2015-07-31 03:17:43 +08:00
|
|
|
/*
|
2018-06-05 02:44:02 +08:00
|
|
|
* Copyright(c) 2015 - 2018 Intel Corporation.
|
2015-07-31 03:17:43 +08:00
|
|
|
*
|
|
|
|
* This file is provided under a dual BSD/GPLv2 license. When using or
|
|
|
|
* redistributing this file, you may do so under either license.
|
|
|
|
*
|
|
|
|
* GPL LICENSE SUMMARY
|
|
|
|
*
|
|
|
|
* This program is free software; you can redistribute it and/or modify
|
|
|
|
* it under the terms of version 2 of the GNU General Public License as
|
|
|
|
* published by the Free Software Foundation.
|
|
|
|
*
|
|
|
|
* This program is distributed in the hope that it will be useful, but
|
|
|
|
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
|
|
* General Public License for more details.
|
|
|
|
*
|
|
|
|
* BSD LICENSE
|
|
|
|
*
|
|
|
|
* Redistribution and use in source and binary forms, with or without
|
|
|
|
* modification, are permitted provided that the following conditions
|
|
|
|
* are met:
|
|
|
|
*
|
|
|
|
* - Redistributions of source code must retain the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer.
|
|
|
|
* - Redistributions in binary form must reproduce the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer in
|
|
|
|
* the documentation and/or other materials provided with the
|
|
|
|
* distribution.
|
|
|
|
* - Neither the name of Intel Corporation nor the names of its
|
|
|
|
* contributors may be used to endorse or promote products derived
|
|
|
|
* from this software without specific prior written permission.
|
|
|
|
*
|
|
|
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
|
|
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
|
|
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
|
|
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
|
|
|
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
|
|
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
|
|
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
|
|
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
|
|
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
|
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
|
|
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
|
|
*
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include <linux/io.h>
|
2016-01-20 06:43:44 +08:00
|
|
|
#include <rdma/rdma_vt.h>
|
|
|
|
#include <rdma/rdmavt_qp.h>
|
2015-07-31 03:17:43 +08:00
|
|
|
|
|
|
|
#include "hfi.h"
|
|
|
|
#include "qp.h"
|
2019-01-24 11:29:44 +08:00
|
|
|
#include "rc.h"
|
2016-02-15 04:44:43 +08:00
|
|
|
#include "verbs_txreq.h"
|
2015-07-31 03:17:43 +08:00
|
|
|
#include "trace.h"
|
|
|
|
|
2019-01-24 11:29:44 +08:00
|
|
|
struct rvt_ack_entry *find_prev_entry(struct rvt_qp *qp, u32 psn, u8 *prev,
|
|
|
|
u8 *prev_ack, bool *scheduled)
|
|
|
|
__must_hold(&qp->s_lock)
|
2015-07-31 03:17:43 +08:00
|
|
|
{
|
2019-01-24 11:29:44 +08:00
|
|
|
struct rvt_ack_entry *e = NULL;
|
|
|
|
u8 i, p;
|
|
|
|
bool s = true;
|
2015-07-31 03:17:43 +08:00
|
|
|
|
2019-01-24 11:29:44 +08:00
|
|
|
for (i = qp->r_head_ack_queue; ; i = p) {
|
|
|
|
if (i == qp->s_tail_ack_queue)
|
|
|
|
s = false;
|
|
|
|
if (i)
|
|
|
|
p = i - 1;
|
|
|
|
else
|
|
|
|
p = rvt_size_atomic(ib_to_rvt(qp->ibqp.device));
|
|
|
|
if (p == qp->r_head_ack_queue) {
|
|
|
|
e = NULL;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
e = &qp->s_ack_queue[p];
|
|
|
|
if (!e->opcode) {
|
|
|
|
e = NULL;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if (cmp_psn(psn, e->psn) >= 0) {
|
|
|
|
if (p == qp->s_tail_ack_queue &&
|
|
|
|
cmp_psn(psn, e->lpsn) <= 0)
|
|
|
|
s = false;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (prev)
|
|
|
|
*prev = p;
|
|
|
|
if (prev_ack)
|
|
|
|
*prev_ack = i;
|
|
|
|
if (scheduled)
|
|
|
|
*scheduled = s;
|
|
|
|
return e;
|
2015-07-31 03:17:43 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* make_rc_ack - construct a response packet (ACK, NAK, or RDMA read)
|
|
|
|
* @dev: the device for this QP
|
|
|
|
* @qp: a pointer to the QP
|
|
|
|
* @ohdr: a pointer to the IB header being constructed
|
2016-02-15 04:44:43 +08:00
|
|
|
* @ps: the xmit packet state
|
2015-07-31 03:17:43 +08:00
|
|
|
*
|
|
|
|
* Return 1 if constructed; otherwise, return 0.
|
|
|
|
* Note that we are in the responder's side of the QP context.
|
|
|
|
* Note the QP s_lock must be held.
|
|
|
|
*/
|
2016-01-20 06:42:28 +08:00
|
|
|
static int make_rc_ack(struct hfi1_ibdev *dev, struct rvt_qp *qp,
|
2016-09-06 19:35:05 +08:00
|
|
|
struct ib_other_headers *ohdr,
|
2016-02-15 04:44:43 +08:00
|
|
|
struct hfi1_pkt_state *ps)
|
2015-07-31 03:17:43 +08:00
|
|
|
{
|
2016-01-20 06:42:28 +08:00
|
|
|
struct rvt_ack_entry *e;
|
2019-01-24 13:51:39 +08:00
|
|
|
u32 hwords, hdrlen;
|
2019-01-24 11:32:09 +08:00
|
|
|
u32 len = 0;
|
|
|
|
u32 bth0 = 0, bth2 = 0;
|
2019-01-24 22:09:46 +08:00
|
|
|
u32 bth1 = qp->remote_qpn | (HFI1_CAP_IS_KSET(OPFN) << IB_BTHE_E_SHIFT);
|
2015-07-31 03:17:43 +08:00
|
|
|
int middle = 0;
|
2016-02-15 04:45:09 +08:00
|
|
|
u32 pmtu = qp->pmtu;
|
2019-01-24 13:51:39 +08:00
|
|
|
struct hfi1_qp_priv *qpriv = qp->priv;
|
2019-01-24 11:32:09 +08:00
|
|
|
bool last_pkt;
|
|
|
|
u32 delta;
|
IB/hfi1: Add an s_acked_ack_queue pointer
The s_ack_queue is managed by two pointers into the ring:
r_head_ack_queue and s_tail_ack_queue. r_head_ack_queue is the index of
where the next received request is going to be placed and s_tail_ack_queue
is the entry of the request currently being processed. This works
perfectly fine for normal Verbs as the requests are processed one at a
time and the s_tail_ack_queue is not moved until the request that it
points to is fully completed.
In this fashion, s_tail_ack_queue constantly chases r_head_ack_queue and
the two pointers can easily be used to determine "queue full" and "queue
empty" conditions.
The detection of these two conditions are imported in determining when an
old entry can safely be overwritten with a new received request and the
resources associated with the old request be safely released.
When pipelined TID RDMA WRITE is introduced into this mix, things look
very different. r_head_ack_queue is still the point at which a newly
received request will be inserted, s_tail_ack_queue is still the
currently processed request. However, with pipelined TID RDMA WRITE
requests, s_tail_ack_queue moves to the next request once all TID RDMA
WRITE responses for that request have been sent. The rest of the protocol
for a particular request is managed by other pointers specific to TID RDMA
- r_tid_tail and r_tid_ack - which point to the entries for which the next
TID RDMA DATA packets are going to arrive and the request for which
the next TID RDMA ACK packets are to be generated, respectively.
What this means is that entries in the ring, which are "behind"
s_tail_ack_queue (entries which s_tail_ack_queue has gone past) are no
longer considered complete. This is where the problem is - a newly
received request could potentially overwrite a still active TID RDMA WRITE
request.
The reason why the TID RDMA pointers trail s_tail_ack_queue is that the
normal Verbs send engine uses s_tail_ack_queue as the pointer for the next
response. Since TID RDMA WRITE responses are processed by the normal Verbs
send engine, s_tail_ack_queue had to be moved to the next entry once all
TID RDMA WRITE response packets were sent to get the desired pipelining
between requests. Doing otherwise would mean that the normal Verbs send
engine would not be able to send the TID RDMA WRITE responses for the next
TID RDMA request until the current one is fully completed.
This patch introduces the s_acked_ack_queue index to point to the next
request to complete on the responder side. For requests other than TID
RDMA WRITE, s_acked_ack_queue should always be kept in sync with
s_tail_ack_queue. For TID RDMA WRITE request, it may fall behind
s_tail_ack_queue.
Reviewed-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
Signed-off-by: Mitko Haralanov <mitko.haralanov@intel.com>
Signed-off-by: Kaike Wan <kaike.wan@intel.com>
Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
2019-01-24 13:48:48 +08:00
|
|
|
u8 next = qp->s_tail_ack_queue;
|
2019-01-24 13:51:39 +08:00
|
|
|
struct tid_rdma_request *req;
|
2015-07-31 03:17:43 +08:00
|
|
|
|
2019-01-24 11:32:44 +08:00
|
|
|
trace_hfi1_rsp_make_rc_ack(qp, 0);
|
2016-09-06 19:37:41 +08:00
|
|
|
lockdep_assert_held(&qp->s_lock);
|
2015-07-31 03:17:43 +08:00
|
|
|
/* Don't send an ACK if we aren't supposed to. */
|
2016-01-20 06:43:33 +08:00
|
|
|
if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK))
|
2015-07-31 03:17:43 +08:00
|
|
|
goto bail;
|
|
|
|
|
2019-01-24 13:51:39 +08:00
|
|
|
if (qpriv->hdr_type == HFI1_PKT_TYPE_9B)
|
2017-08-05 04:54:41 +08:00
|
|
|
/* header size in 32-bit words LRH+BTH = (8+12)/4. */
|
|
|
|
hwords = 5;
|
|
|
|
else
|
|
|
|
/* header size in 32-bit words 16B LRH+BTH = (16+12)/4. */
|
|
|
|
hwords = 7;
|
2015-07-31 03:17:43 +08:00
|
|
|
|
|
|
|
switch (qp->s_ack_state) {
|
|
|
|
case OP(RDMA_READ_RESPONSE_LAST):
|
|
|
|
case OP(RDMA_READ_RESPONSE_ONLY):
|
|
|
|
e = &qp->s_ack_queue[qp->s_tail_ack_queue];
|
2019-03-19 00:58:30 +08:00
|
|
|
release_rdma_sge_mr(e);
|
2015-07-31 03:17:43 +08:00
|
|
|
/* FALLTHROUGH */
|
|
|
|
case OP(ATOMIC_ACKNOWLEDGE):
|
|
|
|
/*
|
|
|
|
* We can increment the tail pointer now that the last
|
|
|
|
* response has been sent instead of only being
|
|
|
|
* constructed.
|
|
|
|
*/
|
IB/hfi1: Add an s_acked_ack_queue pointer
The s_ack_queue is managed by two pointers into the ring:
r_head_ack_queue and s_tail_ack_queue. r_head_ack_queue is the index of
where the next received request is going to be placed and s_tail_ack_queue
is the entry of the request currently being processed. This works
perfectly fine for normal Verbs as the requests are processed one at a
time and the s_tail_ack_queue is not moved until the request that it
points to is fully completed.
In this fashion, s_tail_ack_queue constantly chases r_head_ack_queue and
the two pointers can easily be used to determine "queue full" and "queue
empty" conditions.
The detection of these two conditions are imported in determining when an
old entry can safely be overwritten with a new received request and the
resources associated with the old request be safely released.
When pipelined TID RDMA WRITE is introduced into this mix, things look
very different. r_head_ack_queue is still the point at which a newly
received request will be inserted, s_tail_ack_queue is still the
currently processed request. However, with pipelined TID RDMA WRITE
requests, s_tail_ack_queue moves to the next request once all TID RDMA
WRITE responses for that request have been sent. The rest of the protocol
for a particular request is managed by other pointers specific to TID RDMA
- r_tid_tail and r_tid_ack - which point to the entries for which the next
TID RDMA DATA packets are going to arrive and the request for which
the next TID RDMA ACK packets are to be generated, respectively.
What this means is that entries in the ring, which are "behind"
s_tail_ack_queue (entries which s_tail_ack_queue has gone past) are no
longer considered complete. This is where the problem is - a newly
received request could potentially overwrite a still active TID RDMA WRITE
request.
The reason why the TID RDMA pointers trail s_tail_ack_queue is that the
normal Verbs send engine uses s_tail_ack_queue as the pointer for the next
response. Since TID RDMA WRITE responses are processed by the normal Verbs
send engine, s_tail_ack_queue had to be moved to the next entry once all
TID RDMA WRITE response packets were sent to get the desired pipelining
between requests. Doing otherwise would mean that the normal Verbs send
engine would not be able to send the TID RDMA WRITE responses for the next
TID RDMA request until the current one is fully completed.
This patch introduces the s_acked_ack_queue index to point to the next
request to complete on the responder side. For requests other than TID
RDMA WRITE, s_acked_ack_queue should always be kept in sync with
s_tail_ack_queue. For TID RDMA WRITE request, it may fall behind
s_tail_ack_queue.
Reviewed-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
Signed-off-by: Mitko Haralanov <mitko.haralanov@intel.com>
Signed-off-by: Kaike Wan <kaike.wan@intel.com>
Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
2019-01-24 13:48:48 +08:00
|
|
|
if (++next > rvt_size_atomic(&dev->rdi))
|
|
|
|
next = 0;
|
|
|
|
/*
|
|
|
|
* Only advance the s_acked_ack_queue pointer if there
|
|
|
|
* have been no TID RDMA requests.
|
|
|
|
*/
|
|
|
|
e = &qp->s_ack_queue[qp->s_tail_ack_queue];
|
|
|
|
if (e->opcode != TID_OP(WRITE_REQ) &&
|
|
|
|
qp->s_acked_ack_queue == qp->s_tail_ack_queue)
|
|
|
|
qp->s_acked_ack_queue = next;
|
|
|
|
qp->s_tail_ack_queue = next;
|
2019-01-24 13:52:09 +08:00
|
|
|
trace_hfi1_rsp_make_rc_ack(qp, e->psn);
|
2015-07-31 03:17:43 +08:00
|
|
|
/* FALLTHROUGH */
|
|
|
|
case OP(SEND_ONLY):
|
|
|
|
case OP(ACKNOWLEDGE):
|
|
|
|
/* Check for no next entry in the queue. */
|
|
|
|
if (qp->r_head_ack_queue == qp->s_tail_ack_queue) {
|
2016-01-20 06:43:01 +08:00
|
|
|
if (qp->s_flags & RVT_S_ACK_PENDING)
|
2015-07-31 03:17:43 +08:00
|
|
|
goto normal;
|
|
|
|
goto bail;
|
|
|
|
}
|
|
|
|
|
|
|
|
e = &qp->s_ack_queue[qp->s_tail_ack_queue];
|
2019-01-24 13:51:49 +08:00
|
|
|
/* Check for tid write fence */
|
|
|
|
if ((qpriv->s_flags & HFI1_R_TID_WAIT_INTERLCK) ||
|
|
|
|
hfi1_tid_rdma_ack_interlock(qp, e)) {
|
|
|
|
iowait_set_flag(&qpriv->s_iowait, IOWAIT_PENDING_IB);
|
|
|
|
goto bail;
|
|
|
|
}
|
2015-07-31 03:17:43 +08:00
|
|
|
if (e->opcode == OP(RDMA_READ_REQUEST)) {
|
|
|
|
/*
|
|
|
|
* If a RDMA read response is being resent and
|
|
|
|
* we haven't seen the duplicate request yet,
|
|
|
|
* then stop sending the remaining responses the
|
|
|
|
* responder has seen until the requester re-sends it.
|
|
|
|
*/
|
|
|
|
len = e->rdma_sge.sge_length;
|
|
|
|
if (len && !e->rdma_sge.mr) {
|
IB/hfi1: Add an s_acked_ack_queue pointer
The s_ack_queue is managed by two pointers into the ring:
r_head_ack_queue and s_tail_ack_queue. r_head_ack_queue is the index of
where the next received request is going to be placed and s_tail_ack_queue
is the entry of the request currently being processed. This works
perfectly fine for normal Verbs as the requests are processed one at a
time and the s_tail_ack_queue is not moved until the request that it
points to is fully completed.
In this fashion, s_tail_ack_queue constantly chases r_head_ack_queue and
the two pointers can easily be used to determine "queue full" and "queue
empty" conditions.
The detection of these two conditions are imported in determining when an
old entry can safely be overwritten with a new received request and the
resources associated with the old request be safely released.
When pipelined TID RDMA WRITE is introduced into this mix, things look
very different. r_head_ack_queue is still the point at which a newly
received request will be inserted, s_tail_ack_queue is still the
currently processed request. However, with pipelined TID RDMA WRITE
requests, s_tail_ack_queue moves to the next request once all TID RDMA
WRITE responses for that request have been sent. The rest of the protocol
for a particular request is managed by other pointers specific to TID RDMA
- r_tid_tail and r_tid_ack - which point to the entries for which the next
TID RDMA DATA packets are going to arrive and the request for which
the next TID RDMA ACK packets are to be generated, respectively.
What this means is that entries in the ring, which are "behind"
s_tail_ack_queue (entries which s_tail_ack_queue has gone past) are no
longer considered complete. This is where the problem is - a newly
received request could potentially overwrite a still active TID RDMA WRITE
request.
The reason why the TID RDMA pointers trail s_tail_ack_queue is that the
normal Verbs send engine uses s_tail_ack_queue as the pointer for the next
response. Since TID RDMA WRITE responses are processed by the normal Verbs
send engine, s_tail_ack_queue had to be moved to the next entry once all
TID RDMA WRITE response packets were sent to get the desired pipelining
between requests. Doing otherwise would mean that the normal Verbs send
engine would not be able to send the TID RDMA WRITE responses for the next
TID RDMA request until the current one is fully completed.
This patch introduces the s_acked_ack_queue index to point to the next
request to complete on the responder side. For requests other than TID
RDMA WRITE, s_acked_ack_queue should always be kept in sync with
s_tail_ack_queue. For TID RDMA WRITE request, it may fall behind
s_tail_ack_queue.
Reviewed-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
Signed-off-by: Mitko Haralanov <mitko.haralanov@intel.com>
Signed-off-by: Kaike Wan <kaike.wan@intel.com>
Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
2019-01-24 13:48:48 +08:00
|
|
|
if (qp->s_acked_ack_queue ==
|
|
|
|
qp->s_tail_ack_queue)
|
|
|
|
qp->s_acked_ack_queue =
|
|
|
|
qp->r_head_ack_queue;
|
2015-07-31 03:17:43 +08:00
|
|
|
qp->s_tail_ack_queue = qp->r_head_ack_queue;
|
|
|
|
goto bail;
|
|
|
|
}
|
|
|
|
/* Copy SGE state in case we need to resend */
|
2016-02-15 04:44:52 +08:00
|
|
|
ps->s_txreq->mr = e->rdma_sge.mr;
|
|
|
|
if (ps->s_txreq->mr)
|
|
|
|
rvt_get_mr(ps->s_txreq->mr);
|
2015-07-31 03:17:43 +08:00
|
|
|
qp->s_ack_rdma_sge.sge = e->rdma_sge;
|
|
|
|
qp->s_ack_rdma_sge.num_sge = 1;
|
2016-12-08 11:33:27 +08:00
|
|
|
ps->s_txreq->ss = &qp->s_ack_rdma_sge;
|
2015-07-31 03:17:43 +08:00
|
|
|
if (len > pmtu) {
|
|
|
|
len = pmtu;
|
|
|
|
qp->s_ack_state = OP(RDMA_READ_RESPONSE_FIRST);
|
|
|
|
} else {
|
|
|
|
qp->s_ack_state = OP(RDMA_READ_RESPONSE_ONLY);
|
|
|
|
e->sent = 1;
|
|
|
|
}
|
2017-02-08 21:27:07 +08:00
|
|
|
ohdr->u.aeth = rvt_compute_aeth(qp);
|
2015-07-31 03:17:43 +08:00
|
|
|
hwords++;
|
|
|
|
qp->s_ack_rdma_psn = e->psn;
|
|
|
|
bth2 = mask_psn(qp->s_ack_rdma_psn++);
|
2019-01-24 13:51:39 +08:00
|
|
|
} else if (e->opcode == TID_OP(WRITE_REQ)) {
|
|
|
|
/*
|
|
|
|
* If a TID RDMA WRITE RESP is being resent, we have to
|
|
|
|
* wait for the actual request. All requests that are to
|
|
|
|
* be resent will have their state set to
|
|
|
|
* TID_REQUEST_RESEND. When the new request arrives, the
|
|
|
|
* state will be changed to TID_REQUEST_RESEND_ACTIVE.
|
|
|
|
*/
|
|
|
|
req = ack_to_tid_req(e);
|
|
|
|
if (req->state == TID_REQUEST_RESEND ||
|
|
|
|
req->state == TID_REQUEST_INIT_RESEND)
|
|
|
|
goto bail;
|
|
|
|
qp->s_ack_state = TID_OP(WRITE_RESP);
|
|
|
|
qp->s_ack_rdma_psn = mask_psn(e->psn + req->cur_seg);
|
|
|
|
goto write_resp;
|
2019-01-24 11:32:09 +08:00
|
|
|
} else if (e->opcode == TID_OP(READ_REQ)) {
|
|
|
|
/*
|
|
|
|
* If a TID RDMA read response is being resent and
|
|
|
|
* we haven't seen the duplicate request yet,
|
|
|
|
* then stop sending the remaining responses the
|
|
|
|
* responder has seen until the requester re-sends it.
|
|
|
|
*/
|
|
|
|
len = e->rdma_sge.sge_length;
|
|
|
|
if (len && !e->rdma_sge.mr) {
|
IB/hfi1: Add an s_acked_ack_queue pointer
The s_ack_queue is managed by two pointers into the ring:
r_head_ack_queue and s_tail_ack_queue. r_head_ack_queue is the index of
where the next received request is going to be placed and s_tail_ack_queue
is the entry of the request currently being processed. This works
perfectly fine for normal Verbs as the requests are processed one at a
time and the s_tail_ack_queue is not moved until the request that it
points to is fully completed.
In this fashion, s_tail_ack_queue constantly chases r_head_ack_queue and
the two pointers can easily be used to determine "queue full" and "queue
empty" conditions.
The detection of these two conditions are imported in determining when an
old entry can safely be overwritten with a new received request and the
resources associated with the old request be safely released.
When pipelined TID RDMA WRITE is introduced into this mix, things look
very different. r_head_ack_queue is still the point at which a newly
received request will be inserted, s_tail_ack_queue is still the
currently processed request. However, with pipelined TID RDMA WRITE
requests, s_tail_ack_queue moves to the next request once all TID RDMA
WRITE responses for that request have been sent. The rest of the protocol
for a particular request is managed by other pointers specific to TID RDMA
- r_tid_tail and r_tid_ack - which point to the entries for which the next
TID RDMA DATA packets are going to arrive and the request for which
the next TID RDMA ACK packets are to be generated, respectively.
What this means is that entries in the ring, which are "behind"
s_tail_ack_queue (entries which s_tail_ack_queue has gone past) are no
longer considered complete. This is where the problem is - a newly
received request could potentially overwrite a still active TID RDMA WRITE
request.
The reason why the TID RDMA pointers trail s_tail_ack_queue is that the
normal Verbs send engine uses s_tail_ack_queue as the pointer for the next
response. Since TID RDMA WRITE responses are processed by the normal Verbs
send engine, s_tail_ack_queue had to be moved to the next entry once all
TID RDMA WRITE response packets were sent to get the desired pipelining
between requests. Doing otherwise would mean that the normal Verbs send
engine would not be able to send the TID RDMA WRITE responses for the next
TID RDMA request until the current one is fully completed.
This patch introduces the s_acked_ack_queue index to point to the next
request to complete on the responder side. For requests other than TID
RDMA WRITE, s_acked_ack_queue should always be kept in sync with
s_tail_ack_queue. For TID RDMA WRITE request, it may fall behind
s_tail_ack_queue.
Reviewed-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
Signed-off-by: Mitko Haralanov <mitko.haralanov@intel.com>
Signed-off-by: Kaike Wan <kaike.wan@intel.com>
Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
2019-01-24 13:48:48 +08:00
|
|
|
if (qp->s_acked_ack_queue ==
|
|
|
|
qp->s_tail_ack_queue)
|
|
|
|
qp->s_acked_ack_queue =
|
|
|
|
qp->r_head_ack_queue;
|
2019-01-24 11:32:09 +08:00
|
|
|
qp->s_tail_ack_queue = qp->r_head_ack_queue;
|
|
|
|
goto bail;
|
|
|
|
}
|
|
|
|
/* Copy SGE state in case we need to resend */
|
|
|
|
ps->s_txreq->mr = e->rdma_sge.mr;
|
|
|
|
if (ps->s_txreq->mr)
|
|
|
|
rvt_get_mr(ps->s_txreq->mr);
|
|
|
|
qp->s_ack_rdma_sge.sge = e->rdma_sge;
|
|
|
|
qp->s_ack_rdma_sge.num_sge = 1;
|
|
|
|
qp->s_ack_state = TID_OP(READ_RESP);
|
|
|
|
goto read_resp;
|
2015-07-31 03:17:43 +08:00
|
|
|
} else {
|
|
|
|
/* COMPARE_SWAP or FETCH_ADD */
|
2016-12-08 11:33:27 +08:00
|
|
|
ps->s_txreq->ss = NULL;
|
2015-07-31 03:17:43 +08:00
|
|
|
len = 0;
|
|
|
|
qp->s_ack_state = OP(ATOMIC_ACKNOWLEDGE);
|
2017-02-08 21:27:07 +08:00
|
|
|
ohdr->u.at.aeth = rvt_compute_aeth(qp);
|
2016-09-06 19:35:05 +08:00
|
|
|
ib_u64_put(e->atomic_data, &ohdr->u.at.atomic_ack_eth);
|
2015-07-31 03:17:43 +08:00
|
|
|
hwords += sizeof(ohdr->u.at) / sizeof(u32);
|
|
|
|
bth2 = mask_psn(e->psn);
|
|
|
|
e->sent = 1;
|
|
|
|
}
|
2019-01-24 13:52:09 +08:00
|
|
|
trace_hfi1_tid_write_rsp_make_rc_ack(qp);
|
2015-07-31 03:17:43 +08:00
|
|
|
bth0 = qp->s_ack_state << 24;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case OP(RDMA_READ_RESPONSE_FIRST):
|
|
|
|
qp->s_ack_state = OP(RDMA_READ_RESPONSE_MIDDLE);
|
|
|
|
/* FALLTHROUGH */
|
|
|
|
case OP(RDMA_READ_RESPONSE_MIDDLE):
|
2016-12-08 11:33:27 +08:00
|
|
|
ps->s_txreq->ss = &qp->s_ack_rdma_sge;
|
2016-02-15 04:44:52 +08:00
|
|
|
ps->s_txreq->mr = qp->s_ack_rdma_sge.sge.mr;
|
|
|
|
if (ps->s_txreq->mr)
|
|
|
|
rvt_get_mr(ps->s_txreq->mr);
|
2015-07-31 03:17:43 +08:00
|
|
|
len = qp->s_ack_rdma_sge.sge.sge_length;
|
|
|
|
if (len > pmtu) {
|
|
|
|
len = pmtu;
|
|
|
|
middle = HFI1_CAP_IS_KSET(SDMA_AHG);
|
|
|
|
} else {
|
2017-02-08 21:27:07 +08:00
|
|
|
ohdr->u.aeth = rvt_compute_aeth(qp);
|
2015-07-31 03:17:43 +08:00
|
|
|
hwords++;
|
|
|
|
qp->s_ack_state = OP(RDMA_READ_RESPONSE_LAST);
|
|
|
|
e = &qp->s_ack_queue[qp->s_tail_ack_queue];
|
|
|
|
e->sent = 1;
|
|
|
|
}
|
|
|
|
bth0 = qp->s_ack_state << 24;
|
|
|
|
bth2 = mask_psn(qp->s_ack_rdma_psn++);
|
|
|
|
break;
|
|
|
|
|
2019-01-24 13:51:39 +08:00
|
|
|
case TID_OP(WRITE_RESP):
|
|
|
|
write_resp:
|
|
|
|
/*
|
|
|
|
* 1. Check if RVT_S_ACK_PENDING is set. If yes,
|
|
|
|
* goto normal.
|
|
|
|
* 2. Attempt to allocate TID resources.
|
|
|
|
* 3. Remove RVT_S_RESP_PENDING flags from s_flags
|
|
|
|
* 4. If resources not available:
|
|
|
|
* 4.1 Set RVT_S_WAIT_TID_SPACE
|
|
|
|
* 4.2 Queue QP on RCD TID queue
|
|
|
|
* 4.3 Put QP on iowait list.
|
|
|
|
* 4.4 Build IB RNR NAK with appropriate timeout value
|
|
|
|
* 4.5 Return indication progress made.
|
|
|
|
* 5. If resources are available:
|
|
|
|
* 5.1 Program HW flow CSRs
|
|
|
|
* 5.2 Build TID RDMA WRITE RESP packet
|
|
|
|
* 5.3 If more resources needed, do 2.1 - 2.3.
|
|
|
|
* 5.4 Wake up next QP on RCD TID queue.
|
|
|
|
* 5.5 Return indication progress made.
|
|
|
|
*/
|
|
|
|
|
|
|
|
e = &qp->s_ack_queue[qp->s_tail_ack_queue];
|
|
|
|
req = ack_to_tid_req(e);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Send scheduled RNR NAK's. RNR NAK's need to be sent at
|
|
|
|
* segment boundaries, not at request boundaries. Don't change
|
|
|
|
* s_ack_state because we are still in the middle of a request
|
|
|
|
*/
|
|
|
|
if (qpriv->rnr_nak_state == TID_RNR_NAK_SEND &&
|
|
|
|
qp->s_tail_ack_queue == qpriv->r_tid_alloc &&
|
|
|
|
req->cur_seg == req->alloc_seg) {
|
|
|
|
qpriv->rnr_nak_state = TID_RNR_NAK_SENT;
|
|
|
|
goto normal_no_state;
|
|
|
|
}
|
|
|
|
|
|
|
|
bth2 = mask_psn(qp->s_ack_rdma_psn);
|
|
|
|
hdrlen = hfi1_build_tid_rdma_write_resp(qp, e, ohdr, &bth1,
|
|
|
|
bth2, &len,
|
|
|
|
&ps->s_txreq->ss);
|
|
|
|
if (!hdrlen)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
hwords += hdrlen;
|
|
|
|
bth0 = qp->s_ack_state << 24;
|
|
|
|
qp->s_ack_rdma_psn++;
|
2019-01-24 13:52:09 +08:00
|
|
|
trace_hfi1_tid_req_make_rc_ack_write(qp, 0, e->opcode, e->psn,
|
|
|
|
e->lpsn, req);
|
2019-01-24 13:51:39 +08:00
|
|
|
if (req->cur_seg != req->total_segs)
|
|
|
|
break;
|
|
|
|
|
|
|
|
e->sent = 1;
|
2019-03-19 00:58:30 +08:00
|
|
|
/* Do not free e->rdma_sge until all data are received */
|
|
|
|
qp->s_ack_state = OP(ATOMIC_ACKNOWLEDGE);
|
2019-01-24 13:51:39 +08:00
|
|
|
break;
|
|
|
|
|
2019-01-24 11:32:09 +08:00
|
|
|
case TID_OP(READ_RESP):
|
|
|
|
read_resp:
|
|
|
|
e = &qp->s_ack_queue[qp->s_tail_ack_queue];
|
|
|
|
ps->s_txreq->ss = &qp->s_ack_rdma_sge;
|
|
|
|
delta = hfi1_build_tid_rdma_read_resp(qp, e, ohdr, &bth0,
|
|
|
|
&bth1, &bth2, &len,
|
|
|
|
&last_pkt);
|
|
|
|
if (delta == 0)
|
|
|
|
goto error_qp;
|
|
|
|
hwords += delta;
|
|
|
|
if (last_pkt) {
|
|
|
|
e->sent = 1;
|
|
|
|
/*
|
|
|
|
* Increment qp->s_tail_ack_queue through s_ack_state
|
|
|
|
* transition.
|
|
|
|
*/
|
|
|
|
qp->s_ack_state = OP(RDMA_READ_RESPONSE_LAST);
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case TID_OP(READ_REQ):
|
|
|
|
goto bail;
|
|
|
|
|
2015-07-31 03:17:43 +08:00
|
|
|
default:
|
|
|
|
normal:
|
|
|
|
/*
|
|
|
|
* Send a regular ACK.
|
|
|
|
* Set the s_ack_state so we wait until after sending
|
|
|
|
* the ACK before setting s_ack_state to ACKNOWLEDGE
|
|
|
|
* (see above).
|
|
|
|
*/
|
|
|
|
qp->s_ack_state = OP(SEND_ONLY);
|
2019-01-24 13:51:39 +08:00
|
|
|
normal_no_state:
|
2015-07-31 03:17:43 +08:00
|
|
|
if (qp->s_nak_state)
|
|
|
|
ohdr->u.aeth =
|
2017-02-08 21:28:25 +08:00
|
|
|
cpu_to_be32((qp->r_msn & IB_MSN_MASK) |
|
2015-07-31 03:17:43 +08:00
|
|
|
(qp->s_nak_state <<
|
2017-02-08 21:28:25 +08:00
|
|
|
IB_AETH_CREDIT_SHIFT));
|
2015-07-31 03:17:43 +08:00
|
|
|
else
|
2017-02-08 21:27:07 +08:00
|
|
|
ohdr->u.aeth = rvt_compute_aeth(qp);
|
2015-07-31 03:17:43 +08:00
|
|
|
hwords++;
|
|
|
|
len = 0;
|
|
|
|
bth0 = OP(ACKNOWLEDGE) << 24;
|
|
|
|
bth2 = mask_psn(qp->s_ack_psn);
|
2019-01-24 13:51:39 +08:00
|
|
|
qp->s_flags &= ~RVT_S_ACK_PENDING;
|
2019-01-24 13:52:19 +08:00
|
|
|
ps->s_txreq->txreq.flags |= SDMA_TXREQ_F_VIP;
|
2019-01-24 13:51:39 +08:00
|
|
|
ps->s_txreq->ss = NULL;
|
2015-07-31 03:17:43 +08:00
|
|
|
}
|
|
|
|
qp->s_rdma_ack_cnt++;
|
2019-01-24 13:51:39 +08:00
|
|
|
ps->s_txreq->sde = qpriv->s_sde;
|
2016-12-08 11:33:00 +08:00
|
|
|
ps->s_txreq->s_cur_size = len;
|
2018-02-02 02:46:07 +08:00
|
|
|
ps->s_txreq->hdr_dwords = hwords;
|
2019-01-24 22:09:46 +08:00
|
|
|
hfi1_make_ruc_header(qp, ohdr, bth0, bth1, bth2, middle, ps);
|
2015-07-31 03:17:43 +08:00
|
|
|
return 1;
|
2019-01-24 11:32:09 +08:00
|
|
|
error_qp:
|
|
|
|
spin_unlock_irqrestore(&qp->s_lock, ps->flags);
|
|
|
|
spin_lock_irqsave(&qp->r_lock, ps->flags);
|
|
|
|
spin_lock(&qp->s_lock);
|
|
|
|
rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR);
|
|
|
|
spin_unlock(&qp->s_lock);
|
|
|
|
spin_unlock_irqrestore(&qp->r_lock, ps->flags);
|
|
|
|
spin_lock_irqsave(&qp->s_lock, ps->flags);
|
2015-07-31 03:17:43 +08:00
|
|
|
bail:
|
|
|
|
qp->s_ack_state = OP(ACKNOWLEDGE);
|
|
|
|
/*
|
|
|
|
* Ensure s_rdma_ack_cnt changes are committed prior to resetting
|
2016-01-20 06:43:01 +08:00
|
|
|
* RVT_S_RESP_PENDING
|
2015-07-31 03:17:43 +08:00
|
|
|
*/
|
|
|
|
smp_wmb();
|
2016-01-20 06:43:01 +08:00
|
|
|
qp->s_flags &= ~(RVT_S_RESP_PENDING
|
|
|
|
| RVT_S_ACK_PENDING
|
2018-06-05 02:44:02 +08:00
|
|
|
| HFI1_S_AHG_VALID);
|
2015-07-31 03:17:43 +08:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* hfi1_make_rc_req - construct a request packet (SEND, RDMA r/w, ATOMIC)
|
|
|
|
* @qp: a pointer to the QP
|
|
|
|
*
|
2016-02-15 04:10:04 +08:00
|
|
|
* Assumes s_lock is held.
|
|
|
|
*
|
2015-07-31 03:17:43 +08:00
|
|
|
* Return 1 if constructed; otherwise, return 0.
|
|
|
|
*/
|
2016-02-15 04:44:43 +08:00
|
|
|
int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
|
2015-07-31 03:17:43 +08:00
|
|
|
{
|
2016-01-20 06:42:00 +08:00
|
|
|
struct hfi1_qp_priv *priv = qp->priv;
|
2015-07-31 03:17:43 +08:00
|
|
|
struct hfi1_ibdev *dev = to_idev(qp->ibqp.device);
|
2016-09-06 19:35:05 +08:00
|
|
|
struct ib_other_headers *ohdr;
|
2019-01-24 11:32:09 +08:00
|
|
|
struct rvt_sge_state *ss = NULL;
|
2016-01-20 06:42:28 +08:00
|
|
|
struct rvt_swqe *wqe;
|
2019-01-24 11:32:09 +08:00
|
|
|
struct hfi1_swqe_priv *wpriv;
|
|
|
|
struct tid_rdma_request *req = NULL;
|
|
|
|
/* header size in 32-bit words LRH+BTH = (8+12)/4. */
|
|
|
|
u32 hwords = 5;
|
|
|
|
u32 len = 0;
|
|
|
|
u32 bth0 = 0, bth2 = 0;
|
2019-01-24 22:09:46 +08:00
|
|
|
u32 bth1 = qp->remote_qpn | (HFI1_CAP_IS_KSET(OPFN) << IB_BTHE_E_SHIFT);
|
2015-07-31 03:17:43 +08:00
|
|
|
u32 pmtu = qp->pmtu;
|
|
|
|
char newreq;
|
|
|
|
int middle = 0;
|
|
|
|
int delta;
|
2019-01-24 11:32:09 +08:00
|
|
|
struct tid_rdma_flow *flow = NULL;
|
2019-01-24 13:51:39 +08:00
|
|
|
struct tid_rdma_params *remote;
|
2015-07-31 03:17:43 +08:00
|
|
|
|
2019-01-24 11:32:44 +08:00
|
|
|
trace_hfi1_sender_make_rc_req(qp);
|
2016-09-06 19:37:41 +08:00
|
|
|
lockdep_assert_held(&qp->s_lock);
|
2016-02-15 04:44:43 +08:00
|
|
|
ps->s_txreq = get_txreq(ps->dev, qp);
|
2018-06-21 00:29:08 +08:00
|
|
|
if (!ps->s_txreq)
|
2016-02-15 04:44:43 +08:00
|
|
|
goto bail_no_tx;
|
|
|
|
|
2017-08-05 04:54:41 +08:00
|
|
|
if (priv->hdr_type == HFI1_PKT_TYPE_9B) {
|
|
|
|
/* header size in 32-bit words LRH+BTH = (8+12)/4. */
|
|
|
|
hwords = 5;
|
|
|
|
if (rdma_ah_get_ah_flags(&qp->remote_ah_attr) & IB_AH_GRH)
|
|
|
|
ohdr = &ps->s_txreq->phdr.hdr.ibh.u.l.oth;
|
|
|
|
else
|
|
|
|
ohdr = &ps->s_txreq->phdr.hdr.ibh.u.oth;
|
|
|
|
} else {
|
|
|
|
/* header size in 32-bit words 16B LRH+BTH = (16+12)/4. */
|
|
|
|
hwords = 7;
|
|
|
|
if ((rdma_ah_get_ah_flags(&qp->remote_ah_attr) & IB_AH_GRH) &&
|
|
|
|
(hfi1_check_mcast(rdma_ah_get_dlid(&qp->remote_ah_attr))))
|
|
|
|
ohdr = &ps->s_txreq->phdr.hdr.opah.u.l.oth;
|
|
|
|
else
|
|
|
|
ohdr = &ps->s_txreq->phdr.hdr.opah.u.oth;
|
|
|
|
}
|
2015-07-31 03:17:43 +08:00
|
|
|
|
|
|
|
/* Sending responses has higher priority over sending requests. */
|
2016-01-20 06:43:01 +08:00
|
|
|
if ((qp->s_flags & RVT_S_RESP_PENDING) &&
|
2016-02-15 04:45:09 +08:00
|
|
|
make_rc_ack(dev, qp, ohdr, ps))
|
2016-02-15 04:44:43 +08:00
|
|
|
return 1;
|
2015-07-31 03:17:43 +08:00
|
|
|
|
2016-01-20 06:43:33 +08:00
|
|
|
if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_SEND_OK)) {
|
|
|
|
if (!(ib_rvt_state_ops[qp->state] & RVT_FLUSH_SEND))
|
2015-07-31 03:17:43 +08:00
|
|
|
goto bail;
|
|
|
|
/* We are in the error state, flush the work request. */
|
2017-02-08 21:26:08 +08:00
|
|
|
if (qp->s_last == READ_ONCE(qp->s_head))
|
2015-07-31 03:17:43 +08:00
|
|
|
goto bail;
|
|
|
|
/* If DMAs are in progress, we can't flush immediately. */
|
2016-02-15 04:45:36 +08:00
|
|
|
if (iowait_sdma_pending(&priv->s_iowait)) {
|
2016-01-20 06:43:01 +08:00
|
|
|
qp->s_flags |= RVT_S_WAIT_DMA;
|
2015-07-31 03:17:43 +08:00
|
|
|
goto bail;
|
|
|
|
}
|
|
|
|
clear_ahg(qp);
|
2016-01-20 06:43:33 +08:00
|
|
|
wqe = rvt_get_swqe_ptr(qp, qp->s_last);
|
2019-01-24 11:32:09 +08:00
|
|
|
hfi1_trdma_send_complete(qp, wqe, qp->s_last != qp->s_acked ?
|
|
|
|
IB_WC_SUCCESS : IB_WC_WR_FLUSH_ERR);
|
2015-07-31 03:17:43 +08:00
|
|
|
/* will get called again */
|
2016-02-15 04:44:43 +08:00
|
|
|
goto done_free_tx;
|
2015-07-31 03:17:43 +08:00
|
|
|
}
|
|
|
|
|
2019-01-24 13:51:39 +08:00
|
|
|
if (qp->s_flags & (RVT_S_WAIT_RNR | RVT_S_WAIT_ACK | HFI1_S_WAIT_HALT))
|
2015-07-31 03:17:43 +08:00
|
|
|
goto bail;
|
|
|
|
|
|
|
|
if (cmp_psn(qp->s_psn, qp->s_sending_hpsn) <= 0) {
|
|
|
|
if (cmp_psn(qp->s_sending_psn, qp->s_sending_hpsn) <= 0) {
|
2016-01-20 06:43:01 +08:00
|
|
|
qp->s_flags |= RVT_S_WAIT_PSN;
|
2015-07-31 03:17:43 +08:00
|
|
|
goto bail;
|
|
|
|
}
|
|
|
|
qp->s_sending_psn = qp->s_psn;
|
|
|
|
qp->s_sending_hpsn = qp->s_psn - 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Send a request. */
|
2016-01-20 06:43:33 +08:00
|
|
|
wqe = rvt_get_swqe_ptr(qp, qp->s_cur);
|
2019-01-24 11:32:09 +08:00
|
|
|
check_s_state:
|
2015-07-31 03:17:43 +08:00
|
|
|
switch (qp->s_state) {
|
|
|
|
default:
|
2016-01-20 06:43:33 +08:00
|
|
|
if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_NEXT_SEND_OK))
|
2015-07-31 03:17:43 +08:00
|
|
|
goto bail;
|
|
|
|
/*
|
|
|
|
* Resend an old request or start a new one.
|
|
|
|
*
|
|
|
|
* We keep track of the current SWQE so that
|
|
|
|
* we don't reset the "furthest progress" state
|
|
|
|
* if we need to back up.
|
|
|
|
*/
|
|
|
|
newreq = 0;
|
|
|
|
if (qp->s_cur == qp->s_tail) {
|
|
|
|
/* Check if send work queue is empty. */
|
2017-02-08 21:26:08 +08:00
|
|
|
if (qp->s_tail == READ_ONCE(qp->s_head)) {
|
2015-07-31 03:17:43 +08:00
|
|
|
clear_ahg(qp);
|
|
|
|
goto bail;
|
|
|
|
}
|
|
|
|
/*
|
|
|
|
* If a fence is requested, wait for previous
|
|
|
|
* RDMA read and atomic operations to finish.
|
2019-01-24 11:32:09 +08:00
|
|
|
* However, there is no need to guard against
|
|
|
|
* TID RDMA READ after TID RDMA READ.
|
2015-07-31 03:17:43 +08:00
|
|
|
*/
|
|
|
|
if ((wqe->wr.send_flags & IB_SEND_FENCE) &&
|
2019-01-24 11:32:09 +08:00
|
|
|
qp->s_num_rd_atomic &&
|
|
|
|
(wqe->wr.opcode != IB_WR_TID_RDMA_READ ||
|
|
|
|
priv->pending_tid_r_segs < qp->s_num_rd_atomic)) {
|
2016-01-20 06:43:01 +08:00
|
|
|
qp->s_flags |= RVT_S_WAIT_FENCE;
|
2015-07-31 03:17:43 +08:00
|
|
|
goto bail;
|
|
|
|
}
|
2016-07-26 04:38:37 +08:00
|
|
|
/*
|
|
|
|
* Local operations are processed immediately
|
|
|
|
* after all prior requests have completed
|
|
|
|
*/
|
|
|
|
if (wqe->wr.opcode == IB_WR_REG_MR ||
|
|
|
|
wqe->wr.opcode == IB_WR_LOCAL_INV) {
|
2016-07-26 04:39:45 +08:00
|
|
|
int local_ops = 0;
|
|
|
|
int err = 0;
|
|
|
|
|
2016-07-26 04:38:37 +08:00
|
|
|
if (qp->s_last != qp->s_cur)
|
|
|
|
goto bail;
|
|
|
|
if (++qp->s_cur == qp->s_size)
|
|
|
|
qp->s_cur = 0;
|
|
|
|
if (++qp->s_tail == qp->s_size)
|
|
|
|
qp->s_tail = 0;
|
2016-07-26 04:39:45 +08:00
|
|
|
if (!(wqe->wr.send_flags &
|
|
|
|
RVT_SEND_COMPLETION_ONLY)) {
|
2016-07-26 04:38:37 +08:00
|
|
|
err = rvt_invalidate_rkey(
|
|
|
|
qp,
|
|
|
|
wqe->wr.ex.invalidate_rkey);
|
2016-07-26 04:39:45 +08:00
|
|
|
local_ops = 1;
|
|
|
|
}
|
2018-09-27 01:44:42 +08:00
|
|
|
rvt_send_complete(qp, wqe,
|
|
|
|
err ? IB_WC_LOC_PROT_ERR
|
|
|
|
: IB_WC_SUCCESS);
|
2016-07-26 04:39:45 +08:00
|
|
|
if (local_ops)
|
|
|
|
atomic_dec(&qp->local_ops_pending);
|
2016-07-26 04:38:37 +08:00
|
|
|
goto done_free_tx;
|
|
|
|
}
|
|
|
|
|
2015-07-31 03:17:43 +08:00
|
|
|
newreq = 1;
|
2016-02-15 04:10:04 +08:00
|
|
|
qp->s_psn = wqe->psn;
|
2015-07-31 03:17:43 +08:00
|
|
|
}
|
|
|
|
/*
|
|
|
|
* Note that we have to be careful not to modify the
|
|
|
|
* original work request since we may need to resend
|
|
|
|
* it.
|
|
|
|
*/
|
|
|
|
len = wqe->length;
|
|
|
|
ss = &qp->s_sge;
|
|
|
|
bth2 = mask_psn(qp->s_psn);
|
2019-01-24 22:36:48 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Interlock between various IB requests and TID RDMA
|
|
|
|
* if necessary.
|
|
|
|
*/
|
|
|
|
if ((priv->s_flags & HFI1_S_TID_WAIT_INTERLCK) ||
|
|
|
|
hfi1_tid_rdma_wqe_interlock(qp, wqe))
|
|
|
|
goto bail;
|
|
|
|
|
2015-07-31 03:17:43 +08:00
|
|
|
switch (wqe->wr.opcode) {
|
|
|
|
case IB_WR_SEND:
|
|
|
|
case IB_WR_SEND_WITH_IMM:
|
2016-07-26 04:38:37 +08:00
|
|
|
case IB_WR_SEND_WITH_INV:
|
2015-07-31 03:17:43 +08:00
|
|
|
/* If no credit, return. */
|
2016-01-20 06:43:01 +08:00
|
|
|
if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT) &&
|
2017-02-08 21:27:07 +08:00
|
|
|
rvt_cmp_msn(wqe->ssn, qp->s_lsn + 1) > 0) {
|
2016-01-20 06:43:01 +08:00
|
|
|
qp->s_flags |= RVT_S_WAIT_SSN_CREDIT;
|
2015-07-31 03:17:43 +08:00
|
|
|
goto bail;
|
|
|
|
}
|
|
|
|
if (len > pmtu) {
|
|
|
|
qp->s_state = OP(SEND_FIRST);
|
|
|
|
len = pmtu;
|
|
|
|
break;
|
|
|
|
}
|
2016-02-15 12:22:00 +08:00
|
|
|
if (wqe->wr.opcode == IB_WR_SEND) {
|
2015-07-31 03:17:43 +08:00
|
|
|
qp->s_state = OP(SEND_ONLY);
|
2016-07-26 04:38:37 +08:00
|
|
|
} else if (wqe->wr.opcode == IB_WR_SEND_WITH_IMM) {
|
2015-07-31 03:17:43 +08:00
|
|
|
qp->s_state = OP(SEND_ONLY_WITH_IMMEDIATE);
|
|
|
|
/* Immediate data comes after the BTH */
|
|
|
|
ohdr->u.imm_data = wqe->wr.ex.imm_data;
|
|
|
|
hwords += 1;
|
2016-07-26 04:38:37 +08:00
|
|
|
} else {
|
|
|
|
qp->s_state = OP(SEND_ONLY_WITH_INVALIDATE);
|
|
|
|
/* Invalidate rkey comes after the BTH */
|
|
|
|
ohdr->u.ieth = cpu_to_be32(
|
|
|
|
wqe->wr.ex.invalidate_rkey);
|
|
|
|
hwords += 1;
|
2015-07-31 03:17:43 +08:00
|
|
|
}
|
|
|
|
if (wqe->wr.send_flags & IB_SEND_SOLICITED)
|
|
|
|
bth0 |= IB_BTH_SOLICITED;
|
|
|
|
bth2 |= IB_BTH_REQ_ACK;
|
|
|
|
if (++qp->s_cur == qp->s_size)
|
|
|
|
qp->s_cur = 0;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case IB_WR_RDMA_WRITE:
|
2016-01-20 06:43:01 +08:00
|
|
|
if (newreq && !(qp->s_flags & RVT_S_UNLIMITED_CREDIT))
|
2015-07-31 03:17:43 +08:00
|
|
|
qp->s_lsn++;
|
2017-08-22 09:26:20 +08:00
|
|
|
goto no_flow_control;
|
2015-07-31 03:17:43 +08:00
|
|
|
case IB_WR_RDMA_WRITE_WITH_IMM:
|
|
|
|
/* If no credit, return. */
|
2016-01-20 06:43:01 +08:00
|
|
|
if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT) &&
|
2017-02-08 21:27:07 +08:00
|
|
|
rvt_cmp_msn(wqe->ssn, qp->s_lsn + 1) > 0) {
|
2016-01-20 06:43:01 +08:00
|
|
|
qp->s_flags |= RVT_S_WAIT_SSN_CREDIT;
|
2015-07-31 03:17:43 +08:00
|
|
|
goto bail;
|
|
|
|
}
|
2017-08-22 09:26:20 +08:00
|
|
|
no_flow_control:
|
2016-09-06 19:35:05 +08:00
|
|
|
put_ib_reth_vaddr(
|
|
|
|
wqe->rdma_wr.remote_addr,
|
|
|
|
&ohdr->u.rc.reth);
|
2015-07-31 03:17:43 +08:00
|
|
|
ohdr->u.rc.reth.rkey =
|
2015-10-08 16:16:33 +08:00
|
|
|
cpu_to_be32(wqe->rdma_wr.rkey);
|
2015-07-31 03:17:43 +08:00
|
|
|
ohdr->u.rc.reth.length = cpu_to_be32(len);
|
|
|
|
hwords += sizeof(struct ib_reth) / sizeof(u32);
|
|
|
|
if (len > pmtu) {
|
|
|
|
qp->s_state = OP(RDMA_WRITE_FIRST);
|
|
|
|
len = pmtu;
|
|
|
|
break;
|
|
|
|
}
|
2016-02-15 12:22:00 +08:00
|
|
|
if (wqe->wr.opcode == IB_WR_RDMA_WRITE) {
|
2015-07-31 03:17:43 +08:00
|
|
|
qp->s_state = OP(RDMA_WRITE_ONLY);
|
2016-02-15 12:22:00 +08:00
|
|
|
} else {
|
2015-07-31 03:17:43 +08:00
|
|
|
qp->s_state =
|
|
|
|
OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE);
|
|
|
|
/* Immediate data comes after RETH */
|
|
|
|
ohdr->u.rc.imm_data = wqe->wr.ex.imm_data;
|
|
|
|
hwords += 1;
|
|
|
|
if (wqe->wr.send_flags & IB_SEND_SOLICITED)
|
|
|
|
bth0 |= IB_BTH_SOLICITED;
|
|
|
|
}
|
|
|
|
bth2 |= IB_BTH_REQ_ACK;
|
|
|
|
if (++qp->s_cur == qp->s_size)
|
|
|
|
qp->s_cur = 0;
|
|
|
|
break;
|
|
|
|
|
2019-01-24 13:51:39 +08:00
|
|
|
case IB_WR_TID_RDMA_WRITE:
|
|
|
|
if (newreq) {
|
|
|
|
/*
|
|
|
|
* Limit the number of TID RDMA WRITE requests.
|
|
|
|
*/
|
|
|
|
if (atomic_read(&priv->n_tid_requests) >=
|
|
|
|
HFI1_TID_RDMA_WRITE_CNT)
|
|
|
|
goto bail;
|
|
|
|
|
|
|
|
if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT))
|
|
|
|
qp->s_lsn++;
|
|
|
|
}
|
|
|
|
|
|
|
|
hwords += hfi1_build_tid_rdma_write_req(qp, wqe, ohdr,
|
|
|
|
&bth1, &bth2,
|
|
|
|
&len);
|
|
|
|
ss = NULL;
|
|
|
|
if (priv->s_tid_cur == HFI1_QP_WQE_INVALID) {
|
|
|
|
priv->s_tid_cur = qp->s_cur;
|
|
|
|
if (priv->s_tid_tail == HFI1_QP_WQE_INVALID) {
|
|
|
|
priv->s_tid_tail = qp->s_cur;
|
|
|
|
priv->s_state = TID_OP(WRITE_RESP);
|
|
|
|
}
|
|
|
|
} else if (priv->s_tid_cur == priv->s_tid_head) {
|
|
|
|
struct rvt_swqe *__w;
|
|
|
|
struct tid_rdma_request *__r;
|
|
|
|
|
|
|
|
__w = rvt_get_swqe_ptr(qp, priv->s_tid_cur);
|
|
|
|
__r = wqe_to_tid_req(__w);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* The s_tid_cur pointer is advanced to s_cur if
|
|
|
|
* any of the following conditions about the WQE
|
|
|
|
* to which s_ti_cur currently points to are
|
|
|
|
* satisfied:
|
|
|
|
* 1. The request is not a TID RDMA WRITE
|
|
|
|
* request,
|
|
|
|
* 2. The request is in the INACTIVE or
|
|
|
|
* COMPLETE states (TID RDMA READ requests
|
|
|
|
* stay at INACTIVE and TID RDMA WRITE
|
|
|
|
* transition to COMPLETE when done),
|
|
|
|
* 3. The request is in the ACTIVE or SYNC
|
|
|
|
* state and the number of completed
|
|
|
|
* segments is equal to the total segment
|
|
|
|
* count.
|
|
|
|
* (If ACTIVE, the request is waiting for
|
|
|
|
* ACKs. If SYNC, the request has not
|
|
|
|
* received any responses because it's
|
|
|
|
* waiting on a sync point.)
|
|
|
|
*/
|
|
|
|
if (__w->wr.opcode != IB_WR_TID_RDMA_WRITE ||
|
|
|
|
__r->state == TID_REQUEST_INACTIVE ||
|
|
|
|
__r->state == TID_REQUEST_COMPLETE ||
|
|
|
|
((__r->state == TID_REQUEST_ACTIVE ||
|
|
|
|
__r->state == TID_REQUEST_SYNC) &&
|
|
|
|
__r->comp_seg == __r->total_segs)) {
|
|
|
|
if (priv->s_tid_tail ==
|
|
|
|
priv->s_tid_cur &&
|
|
|
|
priv->s_state ==
|
|
|
|
TID_OP(WRITE_DATA_LAST)) {
|
|
|
|
priv->s_tid_tail = qp->s_cur;
|
|
|
|
priv->s_state =
|
|
|
|
TID_OP(WRITE_RESP);
|
|
|
|
}
|
|
|
|
priv->s_tid_cur = qp->s_cur;
|
|
|
|
}
|
|
|
|
/*
|
|
|
|
* A corner case: when the last TID RDMA WRITE
|
|
|
|
* request was completed, s_tid_head,
|
|
|
|
* s_tid_cur, and s_tid_tail all point to the
|
|
|
|
* same location. Other requests are posted and
|
|
|
|
* s_cur wraps around to the same location,
|
|
|
|
* where a new TID RDMA WRITE is posted. In
|
|
|
|
* this case, none of the indices need to be
|
|
|
|
* updated. However, the priv->s_state should.
|
|
|
|
*/
|
|
|
|
if (priv->s_tid_tail == qp->s_cur &&
|
|
|
|
priv->s_state == TID_OP(WRITE_DATA_LAST))
|
|
|
|
priv->s_state = TID_OP(WRITE_RESP);
|
|
|
|
}
|
|
|
|
req = wqe_to_tid_req(wqe);
|
|
|
|
if (newreq) {
|
|
|
|
priv->s_tid_head = qp->s_cur;
|
|
|
|
priv->pending_tid_w_resp += req->total_segs;
|
|
|
|
atomic_inc(&priv->n_tid_requests);
|
|
|
|
atomic_dec(&priv->n_requests);
|
|
|
|
} else {
|
|
|
|
req->state = TID_REQUEST_RESEND;
|
|
|
|
req->comp_seg = delta_psn(bth2, wqe->psn);
|
|
|
|
/*
|
|
|
|
* Pull back any segments since we are going
|
|
|
|
* to re-receive them.
|
|
|
|
*/
|
|
|
|
req->setup_head = req->clear_tail;
|
|
|
|
priv->pending_tid_w_resp +=
|
|
|
|
delta_psn(wqe->lpsn, bth2) + 1;
|
|
|
|
}
|
|
|
|
|
2019-01-24 13:52:09 +08:00
|
|
|
trace_hfi1_tid_write_sender_make_req(qp, newreq);
|
|
|
|
trace_hfi1_tid_req_make_req_write(qp, newreq,
|
|
|
|
wqe->wr.opcode,
|
|
|
|
wqe->psn, wqe->lpsn,
|
|
|
|
req);
|
2019-01-24 13:51:39 +08:00
|
|
|
if (++qp->s_cur == qp->s_size)
|
|
|
|
qp->s_cur = 0;
|
|
|
|
break;
|
|
|
|
|
2015-07-31 03:17:43 +08:00
|
|
|
case IB_WR_RDMA_READ:
|
|
|
|
/*
|
|
|
|
* Don't allow more operations to be started
|
|
|
|
* than the QP limits allow.
|
|
|
|
*/
|
2019-01-24 11:31:46 +08:00
|
|
|
if (qp->s_num_rd_atomic >=
|
|
|
|
qp->s_max_rd_atomic) {
|
|
|
|
qp->s_flags |= RVT_S_WAIT_RDMAR;
|
|
|
|
goto bail;
|
2015-07-31 03:17:43 +08:00
|
|
|
}
|
2019-01-24 11:31:46 +08:00
|
|
|
qp->s_num_rd_atomic++;
|
|
|
|
if (newreq && !(qp->s_flags & RVT_S_UNLIMITED_CREDIT))
|
|
|
|
qp->s_lsn++;
|
2016-09-06 19:35:05 +08:00
|
|
|
put_ib_reth_vaddr(
|
|
|
|
wqe->rdma_wr.remote_addr,
|
|
|
|
&ohdr->u.rc.reth);
|
2015-07-31 03:17:43 +08:00
|
|
|
ohdr->u.rc.reth.rkey =
|
2015-10-08 16:16:33 +08:00
|
|
|
cpu_to_be32(wqe->rdma_wr.rkey);
|
2015-07-31 03:17:43 +08:00
|
|
|
ohdr->u.rc.reth.length = cpu_to_be32(len);
|
|
|
|
qp->s_state = OP(RDMA_READ_REQUEST);
|
|
|
|
hwords += sizeof(ohdr->u.rc.reth) / sizeof(u32);
|
|
|
|
ss = NULL;
|
|
|
|
len = 0;
|
|
|
|
bth2 |= IB_BTH_REQ_ACK;
|
|
|
|
if (++qp->s_cur == qp->s_size)
|
|
|
|
qp->s_cur = 0;
|
|
|
|
break;
|
|
|
|
|
2019-01-24 11:32:09 +08:00
|
|
|
case IB_WR_TID_RDMA_READ:
|
2019-01-24 11:32:44 +08:00
|
|
|
trace_hfi1_tid_read_sender_make_req(qp, newreq);
|
2019-01-24 11:32:09 +08:00
|
|
|
wpriv = wqe->priv;
|
|
|
|
req = wqe_to_tid_req(wqe);
|
2019-01-24 11:32:44 +08:00
|
|
|
trace_hfi1_tid_req_make_req_read(qp, newreq,
|
|
|
|
wqe->wr.opcode,
|
|
|
|
wqe->psn, wqe->lpsn,
|
|
|
|
req);
|
2019-01-24 11:32:09 +08:00
|
|
|
delta = cmp_psn(qp->s_psn, wqe->psn);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Don't allow more operations to be started
|
|
|
|
* than the QP limits allow. We could get here under
|
|
|
|
* three conditions; (1) It's a new request; (2) We are
|
|
|
|
* sending the second or later segment of a request,
|
|
|
|
* but the qp->s_state is set to OP(RDMA_READ_REQUEST)
|
|
|
|
* when the last segment of a previous request is
|
|
|
|
* received just before this; (3) We are re-sending a
|
|
|
|
* request.
|
|
|
|
*/
|
|
|
|
if (qp->s_num_rd_atomic >= qp->s_max_rd_atomic) {
|
|
|
|
qp->s_flags |= RVT_S_WAIT_RDMAR;
|
|
|
|
goto bail;
|
|
|
|
}
|
|
|
|
if (newreq) {
|
|
|
|
struct tid_rdma_flow *flow =
|
|
|
|
&req->flows[req->setup_head];
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Set up s_sge as it is needed for TID
|
|
|
|
* allocation. However, if the pages have been
|
|
|
|
* walked and mapped, skip it. An earlier try
|
|
|
|
* has failed to allocate the TID entries.
|
|
|
|
*/
|
|
|
|
if (!flow->npagesets) {
|
|
|
|
qp->s_sge.sge = wqe->sg_list[0];
|
|
|
|
qp->s_sge.sg_list = wqe->sg_list + 1;
|
|
|
|
qp->s_sge.num_sge = wqe->wr.num_sge;
|
|
|
|
qp->s_sge.total_len = wqe->length;
|
|
|
|
qp->s_len = wqe->length;
|
|
|
|
req->isge = 0;
|
|
|
|
req->clear_tail = req->setup_head;
|
|
|
|
req->flow_idx = req->setup_head;
|
|
|
|
req->state = TID_REQUEST_ACTIVE;
|
|
|
|
}
|
|
|
|
} else if (delta == 0) {
|
|
|
|
/* Re-send a request */
|
|
|
|
req->cur_seg = 0;
|
|
|
|
req->comp_seg = 0;
|
|
|
|
req->ack_pending = 0;
|
|
|
|
req->flow_idx = req->clear_tail;
|
|
|
|
req->state = TID_REQUEST_RESEND;
|
|
|
|
}
|
|
|
|
req->s_next_psn = qp->s_psn;
|
|
|
|
/* Read one segment at a time */
|
|
|
|
len = min_t(u32, req->seg_len,
|
|
|
|
wqe->length - req->seg_len * req->cur_seg);
|
|
|
|
delta = hfi1_build_tid_rdma_read_req(qp, wqe, ohdr,
|
|
|
|
&bth1, &bth2,
|
|
|
|
&len);
|
|
|
|
if (delta <= 0) {
|
|
|
|
/* Wait for TID space */
|
|
|
|
goto bail;
|
|
|
|
}
|
|
|
|
if (newreq && !(qp->s_flags & RVT_S_UNLIMITED_CREDIT))
|
|
|
|
qp->s_lsn++;
|
|
|
|
hwords += delta;
|
|
|
|
ss = &wpriv->ss;
|
|
|
|
/* Check if this is the last segment */
|
|
|
|
if (req->cur_seg >= req->total_segs &&
|
|
|
|
++qp->s_cur == qp->s_size)
|
|
|
|
qp->s_cur = 0;
|
|
|
|
break;
|
|
|
|
|
2015-07-31 03:17:43 +08:00
|
|
|
case IB_WR_ATOMIC_CMP_AND_SWP:
|
|
|
|
case IB_WR_ATOMIC_FETCH_AND_ADD:
|
|
|
|
/*
|
|
|
|
* Don't allow more operations to be started
|
|
|
|
* than the QP limits allow.
|
|
|
|
*/
|
2019-01-24 11:31:46 +08:00
|
|
|
if (qp->s_num_rd_atomic >=
|
|
|
|
qp->s_max_rd_atomic) {
|
|
|
|
qp->s_flags |= RVT_S_WAIT_RDMAR;
|
|
|
|
goto bail;
|
2015-07-31 03:17:43 +08:00
|
|
|
}
|
2019-01-24 11:31:46 +08:00
|
|
|
qp->s_num_rd_atomic++;
|
2019-01-24 11:21:11 +08:00
|
|
|
|
|
|
|
/* FALLTHROUGH */
|
|
|
|
case IB_WR_OPFN:
|
|
|
|
if (newreq && !(qp->s_flags & RVT_S_UNLIMITED_CREDIT))
|
|
|
|
qp->s_lsn++;
|
|
|
|
if (wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
|
|
|
|
wqe->wr.opcode == IB_WR_OPFN) {
|
2015-07-31 03:17:43 +08:00
|
|
|
qp->s_state = OP(COMPARE_SWAP);
|
2016-09-06 19:35:05 +08:00
|
|
|
put_ib_ateth_swap(wqe->atomic_wr.swap,
|
|
|
|
&ohdr->u.atomic_eth);
|
|
|
|
put_ib_ateth_compare(wqe->atomic_wr.compare_add,
|
|
|
|
&ohdr->u.atomic_eth);
|
2015-07-31 03:17:43 +08:00
|
|
|
} else {
|
|
|
|
qp->s_state = OP(FETCH_ADD);
|
2016-09-06 19:35:05 +08:00
|
|
|
put_ib_ateth_swap(wqe->atomic_wr.compare_add,
|
|
|
|
&ohdr->u.atomic_eth);
|
|
|
|
put_ib_ateth_compare(0, &ohdr->u.atomic_eth);
|
2015-07-31 03:17:43 +08:00
|
|
|
}
|
2016-09-06 19:35:05 +08:00
|
|
|
put_ib_ateth_vaddr(wqe->atomic_wr.remote_addr,
|
|
|
|
&ohdr->u.atomic_eth);
|
2015-07-31 03:17:43 +08:00
|
|
|
ohdr->u.atomic_eth.rkey = cpu_to_be32(
|
2015-10-08 16:16:33 +08:00
|
|
|
wqe->atomic_wr.rkey);
|
2015-07-31 03:17:43 +08:00
|
|
|
hwords += sizeof(struct ib_atomic_eth) / sizeof(u32);
|
|
|
|
ss = NULL;
|
|
|
|
len = 0;
|
|
|
|
bth2 |= IB_BTH_REQ_ACK;
|
|
|
|
if (++qp->s_cur == qp->s_size)
|
|
|
|
qp->s_cur = 0;
|
|
|
|
break;
|
|
|
|
|
|
|
|
default:
|
|
|
|
goto bail;
|
|
|
|
}
|
2019-01-24 11:32:09 +08:00
|
|
|
if (wqe->wr.opcode != IB_WR_TID_RDMA_READ) {
|
|
|
|
qp->s_sge.sge = wqe->sg_list[0];
|
|
|
|
qp->s_sge.sg_list = wqe->sg_list + 1;
|
|
|
|
qp->s_sge.num_sge = wqe->wr.num_sge;
|
|
|
|
qp->s_sge.total_len = wqe->length;
|
|
|
|
qp->s_len = wqe->length;
|
|
|
|
}
|
2015-07-31 03:17:43 +08:00
|
|
|
if (newreq) {
|
|
|
|
qp->s_tail++;
|
|
|
|
if (qp->s_tail >= qp->s_size)
|
|
|
|
qp->s_tail = 0;
|
|
|
|
}
|
2019-01-24 13:51:39 +08:00
|
|
|
if (wqe->wr.opcode == IB_WR_RDMA_READ ||
|
|
|
|
wqe->wr.opcode == IB_WR_TID_RDMA_WRITE)
|
2015-07-31 03:17:43 +08:00
|
|
|
qp->s_psn = wqe->lpsn + 1;
|
2019-01-24 11:32:09 +08:00
|
|
|
else if (wqe->wr.opcode == IB_WR_TID_RDMA_READ)
|
|
|
|
qp->s_psn = req->s_next_psn;
|
2016-02-15 04:10:04 +08:00
|
|
|
else
|
2015-07-31 03:17:43 +08:00
|
|
|
qp->s_psn++;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case OP(RDMA_READ_RESPONSE_FIRST):
|
|
|
|
/*
|
|
|
|
* qp->s_state is normally set to the opcode of the
|
|
|
|
* last packet constructed for new requests and therefore
|
|
|
|
* is never set to RDMA read response.
|
|
|
|
* RDMA_READ_RESPONSE_FIRST is used by the ACK processing
|
|
|
|
* thread to indicate a SEND needs to be restarted from an
|
|
|
|
* earlier PSN without interfering with the sending thread.
|
|
|
|
* See restart_rc().
|
|
|
|
*/
|
|
|
|
qp->s_len = restart_sge(&qp->s_sge, wqe, qp->s_psn, pmtu);
|
|
|
|
/* FALLTHROUGH */
|
|
|
|
case OP(SEND_FIRST):
|
|
|
|
qp->s_state = OP(SEND_MIDDLE);
|
|
|
|
/* FALLTHROUGH */
|
|
|
|
case OP(SEND_MIDDLE):
|
|
|
|
bth2 = mask_psn(qp->s_psn++);
|
|
|
|
ss = &qp->s_sge;
|
|
|
|
len = qp->s_len;
|
|
|
|
if (len > pmtu) {
|
|
|
|
len = pmtu;
|
|
|
|
middle = HFI1_CAP_IS_KSET(SDMA_AHG);
|
|
|
|
break;
|
|
|
|
}
|
2016-02-15 12:22:00 +08:00
|
|
|
if (wqe->wr.opcode == IB_WR_SEND) {
|
2015-07-31 03:17:43 +08:00
|
|
|
qp->s_state = OP(SEND_LAST);
|
2016-07-26 04:38:37 +08:00
|
|
|
} else if (wqe->wr.opcode == IB_WR_SEND_WITH_IMM) {
|
2015-07-31 03:17:43 +08:00
|
|
|
qp->s_state = OP(SEND_LAST_WITH_IMMEDIATE);
|
|
|
|
/* Immediate data comes after the BTH */
|
|
|
|
ohdr->u.imm_data = wqe->wr.ex.imm_data;
|
|
|
|
hwords += 1;
|
2016-07-26 04:38:37 +08:00
|
|
|
} else {
|
|
|
|
qp->s_state = OP(SEND_LAST_WITH_INVALIDATE);
|
|
|
|
/* invalidate data comes after the BTH */
|
|
|
|
ohdr->u.ieth = cpu_to_be32(wqe->wr.ex.invalidate_rkey);
|
|
|
|
hwords += 1;
|
2015-07-31 03:17:43 +08:00
|
|
|
}
|
|
|
|
if (wqe->wr.send_flags & IB_SEND_SOLICITED)
|
|
|
|
bth0 |= IB_BTH_SOLICITED;
|
|
|
|
bth2 |= IB_BTH_REQ_ACK;
|
|
|
|
qp->s_cur++;
|
|
|
|
if (qp->s_cur >= qp->s_size)
|
|
|
|
qp->s_cur = 0;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case OP(RDMA_READ_RESPONSE_LAST):
|
|
|
|
/*
|
|
|
|
* qp->s_state is normally set to the opcode of the
|
|
|
|
* last packet constructed for new requests and therefore
|
|
|
|
* is never set to RDMA read response.
|
|
|
|
* RDMA_READ_RESPONSE_LAST is used by the ACK processing
|
|
|
|
* thread to indicate a RDMA write needs to be restarted from
|
|
|
|
* an earlier PSN without interfering with the sending thread.
|
|
|
|
* See restart_rc().
|
|
|
|
*/
|
|
|
|
qp->s_len = restart_sge(&qp->s_sge, wqe, qp->s_psn, pmtu);
|
|
|
|
/* FALLTHROUGH */
|
|
|
|
case OP(RDMA_WRITE_FIRST):
|
|
|
|
qp->s_state = OP(RDMA_WRITE_MIDDLE);
|
|
|
|
/* FALLTHROUGH */
|
|
|
|
case OP(RDMA_WRITE_MIDDLE):
|
|
|
|
bth2 = mask_psn(qp->s_psn++);
|
|
|
|
ss = &qp->s_sge;
|
|
|
|
len = qp->s_len;
|
|
|
|
if (len > pmtu) {
|
|
|
|
len = pmtu;
|
|
|
|
middle = HFI1_CAP_IS_KSET(SDMA_AHG);
|
|
|
|
break;
|
|
|
|
}
|
2016-02-15 12:22:00 +08:00
|
|
|
if (wqe->wr.opcode == IB_WR_RDMA_WRITE) {
|
2015-07-31 03:17:43 +08:00
|
|
|
qp->s_state = OP(RDMA_WRITE_LAST);
|
2016-02-15 12:22:00 +08:00
|
|
|
} else {
|
2015-07-31 03:17:43 +08:00
|
|
|
qp->s_state = OP(RDMA_WRITE_LAST_WITH_IMMEDIATE);
|
|
|
|
/* Immediate data comes after the BTH */
|
|
|
|
ohdr->u.imm_data = wqe->wr.ex.imm_data;
|
|
|
|
hwords += 1;
|
|
|
|
if (wqe->wr.send_flags & IB_SEND_SOLICITED)
|
|
|
|
bth0 |= IB_BTH_SOLICITED;
|
|
|
|
}
|
|
|
|
bth2 |= IB_BTH_REQ_ACK;
|
|
|
|
qp->s_cur++;
|
|
|
|
if (qp->s_cur >= qp->s_size)
|
|
|
|
qp->s_cur = 0;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case OP(RDMA_READ_RESPONSE_MIDDLE):
|
|
|
|
/*
|
|
|
|
* qp->s_state is normally set to the opcode of the
|
|
|
|
* last packet constructed for new requests and therefore
|
|
|
|
* is never set to RDMA read response.
|
|
|
|
* RDMA_READ_RESPONSE_MIDDLE is used by the ACK processing
|
|
|
|
* thread to indicate a RDMA read needs to be restarted from
|
|
|
|
* an earlier PSN without interfering with the sending thread.
|
|
|
|
* See restart_rc().
|
|
|
|
*/
|
|
|
|
len = (delta_psn(qp->s_psn, wqe->psn)) * pmtu;
|
2016-09-06 19:35:05 +08:00
|
|
|
put_ib_reth_vaddr(
|
|
|
|
wqe->rdma_wr.remote_addr + len,
|
|
|
|
&ohdr->u.rc.reth);
|
2015-07-31 03:17:43 +08:00
|
|
|
ohdr->u.rc.reth.rkey =
|
2015-10-08 16:16:33 +08:00
|
|
|
cpu_to_be32(wqe->rdma_wr.rkey);
|
2015-07-31 03:17:43 +08:00
|
|
|
ohdr->u.rc.reth.length = cpu_to_be32(wqe->length - len);
|
|
|
|
qp->s_state = OP(RDMA_READ_REQUEST);
|
|
|
|
hwords += sizeof(ohdr->u.rc.reth) / sizeof(u32);
|
|
|
|
bth2 = mask_psn(qp->s_psn) | IB_BTH_REQ_ACK;
|
|
|
|
qp->s_psn = wqe->lpsn + 1;
|
|
|
|
ss = NULL;
|
|
|
|
len = 0;
|
|
|
|
qp->s_cur++;
|
|
|
|
if (qp->s_cur == qp->s_size)
|
|
|
|
qp->s_cur = 0;
|
|
|
|
break;
|
2019-01-24 13:51:39 +08:00
|
|
|
|
|
|
|
case TID_OP(WRITE_RESP):
|
|
|
|
/*
|
|
|
|
* This value for s_state is used for restarting a TID RDMA
|
|
|
|
* WRITE request. See comment in OP(RDMA_READ_RESPONSE_MIDDLE
|
|
|
|
* for more).
|
|
|
|
*/
|
|
|
|
req = wqe_to_tid_req(wqe);
|
|
|
|
req->state = TID_REQUEST_RESEND;
|
|
|
|
rcu_read_lock();
|
|
|
|
remote = rcu_dereference(priv->tid_rdma.remote);
|
|
|
|
req->comp_seg = delta_psn(qp->s_psn, wqe->psn);
|
|
|
|
len = wqe->length - (req->comp_seg * remote->max_len);
|
|
|
|
rcu_read_unlock();
|
|
|
|
|
|
|
|
bth2 = mask_psn(qp->s_psn);
|
|
|
|
hwords += hfi1_build_tid_rdma_write_req(qp, wqe, ohdr, &bth1,
|
|
|
|
&bth2, &len);
|
|
|
|
qp->s_psn = wqe->lpsn + 1;
|
|
|
|
ss = NULL;
|
|
|
|
qp->s_state = TID_OP(WRITE_REQ);
|
|
|
|
priv->pending_tid_w_resp += delta_psn(wqe->lpsn, bth2) + 1;
|
|
|
|
priv->s_tid_cur = qp->s_cur;
|
|
|
|
if (++qp->s_cur == qp->s_size)
|
|
|
|
qp->s_cur = 0;
|
2019-01-24 13:52:09 +08:00
|
|
|
trace_hfi1_tid_req_make_req_write(qp, 0, wqe->wr.opcode,
|
|
|
|
wqe->psn, wqe->lpsn, req);
|
2019-01-24 13:51:39 +08:00
|
|
|
break;
|
|
|
|
|
2019-01-24 11:32:09 +08:00
|
|
|
case TID_OP(READ_RESP):
|
|
|
|
if (wqe->wr.opcode != IB_WR_TID_RDMA_READ)
|
|
|
|
goto bail;
|
|
|
|
/* This is used to restart a TID read request */
|
|
|
|
req = wqe_to_tid_req(wqe);
|
|
|
|
wpriv = wqe->priv;
|
|
|
|
/*
|
|
|
|
* Back down. The field qp->s_psn has been set to the psn with
|
|
|
|
* which the request should be restart. It's OK to use division
|
|
|
|
* as this is on the retry path.
|
|
|
|
*/
|
|
|
|
req->cur_seg = delta_psn(qp->s_psn, wqe->psn) / priv->pkts_ps;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* The following function need to be redefined to return the
|
|
|
|
* status to make sure that we find the flow. At the same
|
|
|
|
* time, we can use the req->state change to check if the
|
|
|
|
* call succeeds or not.
|
|
|
|
*/
|
|
|
|
req->state = TID_REQUEST_RESEND;
|
|
|
|
hfi1_tid_rdma_restart_req(qp, wqe, &bth2);
|
|
|
|
if (req->state != TID_REQUEST_ACTIVE) {
|
|
|
|
/*
|
|
|
|
* Failed to find the flow. Release all allocated tid
|
|
|
|
* resources.
|
|
|
|
*/
|
|
|
|
hfi1_kern_exp_rcv_clear_all(req);
|
|
|
|
hfi1_kern_clear_hw_flow(priv->rcd, qp);
|
|
|
|
|
|
|
|
hfi1_trdma_send_complete(qp, wqe, IB_WC_LOC_QP_OP_ERR);
|
|
|
|
goto bail;
|
|
|
|
}
|
|
|
|
req->state = TID_REQUEST_RESEND;
|
|
|
|
len = min_t(u32, req->seg_len,
|
|
|
|
wqe->length - req->seg_len * req->cur_seg);
|
|
|
|
flow = &req->flows[req->flow_idx];
|
|
|
|
len -= flow->sent;
|
|
|
|
req->s_next_psn = flow->flow_state.ib_lpsn + 1;
|
|
|
|
delta = hfi1_build_tid_rdma_read_packet(wqe, ohdr, &bth1,
|
|
|
|
&bth2, &len);
|
|
|
|
if (delta <= 0) {
|
|
|
|
/* Wait for TID space */
|
|
|
|
goto bail;
|
|
|
|
}
|
|
|
|
hwords += delta;
|
|
|
|
ss = &wpriv->ss;
|
|
|
|
/* Check if this is the last segment */
|
|
|
|
if (req->cur_seg >= req->total_segs &&
|
|
|
|
++qp->s_cur == qp->s_size)
|
|
|
|
qp->s_cur = 0;
|
|
|
|
qp->s_psn = req->s_next_psn;
|
2019-01-24 11:32:44 +08:00
|
|
|
trace_hfi1_tid_req_make_req_read(qp, 0, wqe->wr.opcode,
|
|
|
|
wqe->psn, wqe->lpsn, req);
|
2019-01-24 11:32:09 +08:00
|
|
|
break;
|
|
|
|
case TID_OP(READ_REQ):
|
|
|
|
req = wqe_to_tid_req(wqe);
|
|
|
|
delta = cmp_psn(qp->s_psn, wqe->psn);
|
|
|
|
/*
|
|
|
|
* If the current WR is not TID RDMA READ, or this is the start
|
|
|
|
* of a new request, we need to change the qp->s_state so that
|
|
|
|
* the request can be set up properly.
|
|
|
|
*/
|
|
|
|
if (wqe->wr.opcode != IB_WR_TID_RDMA_READ || delta == 0 ||
|
|
|
|
qp->s_cur == qp->s_tail) {
|
|
|
|
qp->s_state = OP(RDMA_READ_REQUEST);
|
|
|
|
if (delta == 0 || qp->s_cur == qp->s_tail)
|
|
|
|
goto check_s_state;
|
|
|
|
else
|
|
|
|
goto bail;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Rate limiting */
|
|
|
|
if (qp->s_num_rd_atomic >= qp->s_max_rd_atomic) {
|
|
|
|
qp->s_flags |= RVT_S_WAIT_RDMAR;
|
|
|
|
goto bail;
|
|
|
|
}
|
|
|
|
|
|
|
|
wpriv = wqe->priv;
|
|
|
|
/* Read one segment at a time */
|
|
|
|
len = min_t(u32, req->seg_len,
|
|
|
|
wqe->length - req->seg_len * req->cur_seg);
|
|
|
|
delta = hfi1_build_tid_rdma_read_req(qp, wqe, ohdr, &bth1,
|
|
|
|
&bth2, &len);
|
|
|
|
if (delta <= 0) {
|
|
|
|
/* Wait for TID space */
|
|
|
|
goto bail;
|
|
|
|
}
|
|
|
|
hwords += delta;
|
|
|
|
ss = &wpriv->ss;
|
|
|
|
/* Check if this is the last segment */
|
|
|
|
if (req->cur_seg >= req->total_segs &&
|
|
|
|
++qp->s_cur == qp->s_size)
|
|
|
|
qp->s_cur = 0;
|
|
|
|
qp->s_psn = req->s_next_psn;
|
2019-01-24 11:32:44 +08:00
|
|
|
trace_hfi1_tid_req_make_req_read(qp, 0, wqe->wr.opcode,
|
|
|
|
wqe->psn, wqe->lpsn, req);
|
2019-01-24 11:32:09 +08:00
|
|
|
break;
|
2015-07-31 03:17:43 +08:00
|
|
|
}
|
|
|
|
qp->s_sending_hpsn = bth2;
|
|
|
|
delta = delta_psn(bth2, wqe->psn);
|
2019-01-24 13:51:39 +08:00
|
|
|
if (delta && delta % HFI1_PSN_CREDIT == 0 &&
|
|
|
|
wqe->wr.opcode != IB_WR_TID_RDMA_WRITE)
|
2015-07-31 03:17:43 +08:00
|
|
|
bth2 |= IB_BTH_REQ_ACK;
|
2016-01-20 06:43:01 +08:00
|
|
|
if (qp->s_flags & RVT_S_SEND_ONE) {
|
|
|
|
qp->s_flags &= ~RVT_S_SEND_ONE;
|
|
|
|
qp->s_flags |= RVT_S_WAIT_ACK;
|
2015-07-31 03:17:43 +08:00
|
|
|
bth2 |= IB_BTH_REQ_ACK;
|
|
|
|
}
|
|
|
|
qp->s_len -= len;
|
2018-02-02 02:46:07 +08:00
|
|
|
ps->s_txreq->hdr_dwords = hwords;
|
2016-02-15 04:45:36 +08:00
|
|
|
ps->s_txreq->sde = priv->s_sde;
|
2016-12-08 11:33:27 +08:00
|
|
|
ps->s_txreq->ss = ss;
|
2016-12-08 11:33:00 +08:00
|
|
|
ps->s_txreq->s_cur_size = len;
|
2015-07-31 03:17:43 +08:00
|
|
|
hfi1_make_ruc_header(
|
|
|
|
qp,
|
|
|
|
ohdr,
|
|
|
|
bth0 | (qp->s_state << 24),
|
2019-01-24 22:09:46 +08:00
|
|
|
bth1,
|
2015-07-31 03:17:43 +08:00
|
|
|
bth2,
|
2016-02-15 04:44:43 +08:00
|
|
|
middle,
|
|
|
|
ps);
|
|
|
|
return 1;
|
|
|
|
|
|
|
|
done_free_tx:
|
|
|
|
hfi1_put_txreq(ps->s_txreq);
|
|
|
|
ps->s_txreq = NULL;
|
2016-02-15 04:10:04 +08:00
|
|
|
return 1;
|
2016-02-15 04:44:43 +08:00
|
|
|
|
2015-07-31 03:17:43 +08:00
|
|
|
bail:
|
2016-02-15 04:44:43 +08:00
|
|
|
hfi1_put_txreq(ps->s_txreq);
|
|
|
|
|
|
|
|
bail_no_tx:
|
|
|
|
ps->s_txreq = NULL;
|
2016-01-20 06:43:01 +08:00
|
|
|
qp->s_flags &= ~RVT_S_BUSY;
|
2019-01-24 13:51:39 +08:00
|
|
|
/*
|
|
|
|
* If we didn't get a txreq, the QP will be woken up later to try
|
|
|
|
* again. Set the flags to indicate which work item to wake
|
|
|
|
* up.
|
|
|
|
*/
|
|
|
|
iowait_set_flag(&priv->s_iowait, IOWAIT_PENDING_IB);
|
2016-02-15 04:44:43 +08:00
|
|
|
return 0;
|
2015-07-31 03:17:43 +08:00
|
|
|
}
|
|
|
|
|
2017-08-05 04:54:41 +08:00
|
|
|
static inline void hfi1_make_bth_aeth(struct rvt_qp *qp,
|
|
|
|
struct ib_other_headers *ohdr,
|
|
|
|
u32 bth0, u32 bth1)
|
|
|
|
{
|
|
|
|
if (qp->r_nak_state)
|
|
|
|
ohdr->u.aeth = cpu_to_be32((qp->r_msn & IB_MSN_MASK) |
|
|
|
|
(qp->r_nak_state <<
|
|
|
|
IB_AETH_CREDIT_SHIFT));
|
|
|
|
else
|
|
|
|
ohdr->u.aeth = rvt_compute_aeth(qp);
|
|
|
|
|
|
|
|
ohdr->bth[0] = cpu_to_be32(bth0);
|
|
|
|
ohdr->bth[1] = cpu_to_be32(bth1 | qp->remote_qpn);
|
|
|
|
ohdr->bth[2] = cpu_to_be32(mask_psn(qp->r_ack_psn));
|
|
|
|
}
|
|
|
|
|
2018-02-02 02:46:31 +08:00
|
|
|
static inline void hfi1_queue_rc_ack(struct hfi1_packet *packet, bool is_fecn)
|
2017-08-05 04:54:41 +08:00
|
|
|
{
|
2018-02-02 02:46:31 +08:00
|
|
|
struct rvt_qp *qp = packet->qp;
|
|
|
|
struct hfi1_ibport *ibp;
|
2017-08-05 04:54:41 +08:00
|
|
|
unsigned long flags;
|
|
|
|
|
|
|
|
spin_lock_irqsave(&qp->s_lock, flags);
|
|
|
|
if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK))
|
|
|
|
goto unlock;
|
2018-02-02 02:46:31 +08:00
|
|
|
ibp = rcd_to_iport(packet->rcd);
|
2017-08-05 04:54:41 +08:00
|
|
|
this_cpu_inc(*ibp->rvp.rc_qacks);
|
|
|
|
qp->s_flags |= RVT_S_ACK_PENDING | RVT_S_RESP_PENDING;
|
|
|
|
qp->s_nak_state = qp->r_nak_state;
|
|
|
|
qp->s_ack_psn = qp->r_ack_psn;
|
|
|
|
if (is_fecn)
|
|
|
|
qp->s_flags |= RVT_S_ECN;
|
|
|
|
|
|
|
|
/* Schedule the send tasklet. */
|
|
|
|
hfi1_schedule_send(qp);
|
|
|
|
unlock:
|
|
|
|
spin_unlock_irqrestore(&qp->s_lock, flags);
|
|
|
|
}
|
|
|
|
|
2018-02-02 02:46:31 +08:00
|
|
|
static inline void hfi1_make_rc_ack_9B(struct hfi1_packet *packet,
|
2017-08-05 04:54:41 +08:00
|
|
|
struct hfi1_opa_header *opa_hdr,
|
|
|
|
u8 sc5, bool is_fecn,
|
|
|
|
u64 *pbc_flags, u32 *hwords,
|
|
|
|
u32 *nwords)
|
|
|
|
{
|
2018-02-02 02:46:31 +08:00
|
|
|
struct rvt_qp *qp = packet->qp;
|
|
|
|
struct hfi1_ibport *ibp = rcd_to_iport(packet->rcd);
|
2017-08-05 04:54:41 +08:00
|
|
|
struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
|
|
|
|
struct ib_header *hdr = &opa_hdr->ibh;
|
|
|
|
struct ib_other_headers *ohdr;
|
|
|
|
u16 lrh0 = HFI1_LRH_BTH;
|
|
|
|
u16 pkey;
|
|
|
|
u32 bth0, bth1;
|
|
|
|
|
|
|
|
opa_hdr->hdr_type = HFI1_PKT_TYPE_9B;
|
|
|
|
ohdr = &hdr->u.oth;
|
|
|
|
/* header size in 32-bit words LRH+BTH+AETH = (8+12+4)/4 */
|
|
|
|
*hwords = 6;
|
|
|
|
|
|
|
|
if (unlikely(rdma_ah_get_ah_flags(&qp->remote_ah_attr) & IB_AH_GRH)) {
|
|
|
|
*hwords += hfi1_make_grh(ibp, &hdr->u.l.grh,
|
|
|
|
rdma_ah_read_grh(&qp->remote_ah_attr),
|
|
|
|
*hwords - 2, SIZE_OF_CRC);
|
|
|
|
ohdr = &hdr->u.l.oth;
|
|
|
|
lrh0 = HFI1_LRH_GRH;
|
|
|
|
}
|
|
|
|
/* set PBC_DC_INFO bit (aka SC[4]) in pbc_flags */
|
|
|
|
*pbc_flags |= ((!!(sc5 & 0x10)) << PBC_DC_INFO_SHIFT);
|
|
|
|
|
|
|
|
/* read pkey_index w/o lock (its atomic) */
|
|
|
|
pkey = hfi1_get_pkey(ibp, qp->s_pkey_index);
|
|
|
|
|
|
|
|
lrh0 |= (sc5 & IB_SC_MASK) << IB_SC_SHIFT |
|
|
|
|
(rdma_ah_get_sl(&qp->remote_ah_attr) & IB_SL_MASK) <<
|
|
|
|
IB_SL_SHIFT;
|
|
|
|
|
|
|
|
hfi1_make_ib_hdr(hdr, lrh0, *hwords + SIZE_OF_CRC,
|
|
|
|
opa_get_lid(rdma_ah_get_dlid(&qp->remote_ah_attr), 9B),
|
|
|
|
ppd->lid | rdma_ah_get_path_bits(&qp->remote_ah_attr));
|
|
|
|
|
|
|
|
bth0 = pkey | (OP(ACKNOWLEDGE) << 24);
|
|
|
|
if (qp->s_mig_state == IB_MIG_MIGRATED)
|
|
|
|
bth0 |= IB_BTH_MIG_REQ;
|
|
|
|
bth1 = (!!is_fecn) << IB_BECN_SHIFT;
|
2019-01-24 22:09:46 +08:00
|
|
|
/*
|
|
|
|
* Inline ACKs go out without the use of the Verbs send engine, so
|
|
|
|
* we need to set the STL Verbs Extended bit here
|
|
|
|
*/
|
|
|
|
bth1 |= HFI1_CAP_IS_KSET(OPFN) << IB_BTHE_E_SHIFT;
|
2017-08-05 04:54:41 +08:00
|
|
|
hfi1_make_bth_aeth(qp, ohdr, bth0, bth1);
|
|
|
|
}
|
|
|
|
|
2018-02-02 02:46:31 +08:00
|
|
|
static inline void hfi1_make_rc_ack_16B(struct hfi1_packet *packet,
|
2017-08-05 04:54:41 +08:00
|
|
|
struct hfi1_opa_header *opa_hdr,
|
|
|
|
u8 sc5, bool is_fecn,
|
|
|
|
u64 *pbc_flags, u32 *hwords,
|
|
|
|
u32 *nwords)
|
|
|
|
{
|
2018-02-02 02:46:31 +08:00
|
|
|
struct rvt_qp *qp = packet->qp;
|
|
|
|
struct hfi1_ibport *ibp = rcd_to_iport(packet->rcd);
|
2017-08-05 04:54:41 +08:00
|
|
|
struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
|
|
|
|
struct hfi1_16b_header *hdr = &opa_hdr->opah;
|
|
|
|
struct ib_other_headers *ohdr;
|
2017-11-14 20:34:23 +08:00
|
|
|
u32 bth0, bth1 = 0;
|
2017-08-05 04:54:41 +08:00
|
|
|
u16 len, pkey;
|
2018-02-02 02:46:38 +08:00
|
|
|
bool becn = is_fecn;
|
2017-08-05 04:54:41 +08:00
|
|
|
u8 l4 = OPA_16B_L4_IB_LOCAL;
|
|
|
|
u8 extra_bytes;
|
|
|
|
|
|
|
|
opa_hdr->hdr_type = HFI1_PKT_TYPE_16B;
|
|
|
|
ohdr = &hdr->u.oth;
|
|
|
|
/* header size in 32-bit words 16B LRH+BTH+AETH = (16+12+4)/4 */
|
|
|
|
*hwords = 8;
|
|
|
|
extra_bytes = hfi1_get_16b_padding(*hwords << 2, 0);
|
|
|
|
*nwords = SIZE_OF_CRC + ((extra_bytes + SIZE_OF_LT) >> 2);
|
|
|
|
|
|
|
|
if (unlikely(rdma_ah_get_ah_flags(&qp->remote_ah_attr) & IB_AH_GRH) &&
|
|
|
|
hfi1_check_mcast(rdma_ah_get_dlid(&qp->remote_ah_attr))) {
|
|
|
|
*hwords += hfi1_make_grh(ibp, &hdr->u.l.grh,
|
|
|
|
rdma_ah_read_grh(&qp->remote_ah_attr),
|
|
|
|
*hwords - 4, *nwords);
|
|
|
|
ohdr = &hdr->u.l.oth;
|
|
|
|
l4 = OPA_16B_L4_IB_GLOBAL;
|
|
|
|
}
|
|
|
|
*pbc_flags |= PBC_PACKET_BYPASS | PBC_INSERT_BYPASS_ICRC;
|
|
|
|
|
|
|
|
/* read pkey_index w/o lock (its atomic) */
|
|
|
|
pkey = hfi1_get_pkey(ibp, qp->s_pkey_index);
|
|
|
|
|
|
|
|
/* Convert dwords to flits */
|
|
|
|
len = (*hwords + *nwords) >> 1;
|
|
|
|
|
2017-11-14 20:34:16 +08:00
|
|
|
hfi1_make_16b_hdr(hdr, ppd->lid |
|
|
|
|
(rdma_ah_get_path_bits(&qp->remote_ah_attr) &
|
|
|
|
((1 << ppd->lmc) - 1)),
|
2017-08-05 04:54:41 +08:00
|
|
|
opa_get_lid(rdma_ah_get_dlid(&qp->remote_ah_attr),
|
2017-11-14 20:34:16 +08:00
|
|
|
16B), len, pkey, becn, 0, l4, sc5);
|
2017-08-05 04:54:41 +08:00
|
|
|
|
|
|
|
bth0 = pkey | (OP(ACKNOWLEDGE) << 24);
|
|
|
|
bth0 |= extra_bytes << 20;
|
|
|
|
if (qp->s_mig_state == IB_MIG_MIGRATED)
|
|
|
|
bth1 = OPA_BTH_MIG_REQ;
|
|
|
|
hfi1_make_bth_aeth(qp, ohdr, bth0, bth1);
|
|
|
|
}
|
|
|
|
|
2018-02-02 02:46:31 +08:00
|
|
|
typedef void (*hfi1_make_rc_ack)(struct hfi1_packet *packet,
|
2017-08-05 04:54:41 +08:00
|
|
|
struct hfi1_opa_header *opa_hdr,
|
|
|
|
u8 sc5, bool is_fecn,
|
|
|
|
u64 *pbc_flags, u32 *hwords,
|
|
|
|
u32 *nwords);
|
|
|
|
|
|
|
|
/* We support only two types - 9B and 16B for now */
|
|
|
|
static const hfi1_make_rc_ack hfi1_make_rc_ack_tbl[2] = {
|
|
|
|
[HFI1_PKT_TYPE_9B] = &hfi1_make_rc_ack_9B,
|
|
|
|
[HFI1_PKT_TYPE_16B] = &hfi1_make_rc_ack_16B
|
|
|
|
};
|
|
|
|
|
2015-07-31 03:17:43 +08:00
|
|
|
/**
|
|
|
|
* hfi1_send_rc_ack - Construct an ACK packet and send it
|
|
|
|
* @qp: a pointer to the QP
|
|
|
|
*
|
|
|
|
* This is called from hfi1_rc_rcv() and handle_receive_interrupt().
|
|
|
|
* Note that RDMA reads and atomics are handled in the
|
2016-09-25 22:42:08 +08:00
|
|
|
* send side QP state and send engine.
|
2015-07-31 03:17:43 +08:00
|
|
|
*/
|
2018-02-02 02:46:31 +08:00
|
|
|
void hfi1_send_rc_ack(struct hfi1_packet *packet, bool is_fecn)
|
2015-07-31 03:17:43 +08:00
|
|
|
{
|
2018-02-02 02:46:31 +08:00
|
|
|
struct hfi1_ctxtdata *rcd = packet->rcd;
|
|
|
|
struct rvt_qp *qp = packet->qp;
|
2017-02-08 21:26:25 +08:00
|
|
|
struct hfi1_ibport *ibp = rcd_to_iport(rcd);
|
2017-08-05 04:54:41 +08:00
|
|
|
struct hfi1_qp_priv *priv = qp->priv;
|
2015-07-31 03:17:43 +08:00
|
|
|
struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
|
2017-08-05 04:54:41 +08:00
|
|
|
u8 sc5 = ibp->sl_to_sc[rdma_ah_get_sl(&qp->remote_ah_attr)];
|
2015-07-31 03:17:43 +08:00
|
|
|
u64 pbc, pbc_flags = 0;
|
2017-08-05 04:54:41 +08:00
|
|
|
u32 hwords = 0;
|
|
|
|
u32 nwords = 0;
|
|
|
|
u32 plen;
|
2015-07-31 03:17:43 +08:00
|
|
|
struct pio_buf *pbuf;
|
2017-08-05 04:54:41 +08:00
|
|
|
struct hfi1_opa_header opa_hdr;
|
2017-02-08 21:25:56 +08:00
|
|
|
|
|
|
|
/* clear the defer count */
|
2017-05-04 20:14:04 +08:00
|
|
|
qp->r_adefered = 0;
|
2015-07-31 03:17:43 +08:00
|
|
|
|
|
|
|
/* Don't send ACK or NAK if a RDMA read or atomic is pending. */
|
2017-08-05 04:54:41 +08:00
|
|
|
if (qp->s_flags & RVT_S_RESP_PENDING) {
|
2018-02-02 02:46:31 +08:00
|
|
|
hfi1_queue_rc_ack(packet, is_fecn);
|
2017-08-05 04:54:41 +08:00
|
|
|
return;
|
|
|
|
}
|
2015-07-31 03:17:43 +08:00
|
|
|
|
|
|
|
/* Ensure s_rdma_ack_cnt changes are committed */
|
2017-08-05 04:54:41 +08:00
|
|
|
if (qp->s_rdma_ack_cnt) {
|
2018-02-02 02:46:31 +08:00
|
|
|
hfi1_queue_rc_ack(packet, is_fecn);
|
2017-08-05 04:54:41 +08:00
|
|
|
return;
|
2015-07-31 03:17:43 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/* Don't try to send ACKs if the link isn't ACTIVE */
|
|
|
|
if (driver_lstate(ppd) != IB_PORT_ACTIVE)
|
|
|
|
return;
|
|
|
|
|
2017-08-05 04:54:41 +08:00
|
|
|
/* Make the appropriate header */
|
2018-02-02 02:46:31 +08:00
|
|
|
hfi1_make_rc_ack_tbl[priv->hdr_type](packet, &opa_hdr, sc5, is_fecn,
|
2017-08-05 04:54:41 +08:00
|
|
|
&pbc_flags, &hwords, &nwords);
|
2015-07-31 03:17:43 +08:00
|
|
|
|
2017-08-05 04:54:41 +08:00
|
|
|
plen = 2 /* PBC */ + hwords + nwords;
|
|
|
|
pbc = create_pbc(ppd, pbc_flags, qp->srate_mbps,
|
|
|
|
sc_to_vlt(ppd->dd, sc5), plen);
|
|
|
|
pbuf = sc_buffer_alloc(rcd->sc, plen, NULL, NULL);
|
2019-06-15 00:33:06 +08:00
|
|
|
if (IS_ERR_OR_NULL(pbuf)) {
|
2015-07-31 03:17:43 +08:00
|
|
|
/*
|
|
|
|
* We have no room to send at the moment. Pass
|
2016-09-25 22:42:08 +08:00
|
|
|
* responsibility for sending the ACK to the send engine
|
2015-07-31 03:17:43 +08:00
|
|
|
* so that when enough buffer space becomes available,
|
|
|
|
* the ACK is sent ahead of other outgoing packets.
|
|
|
|
*/
|
2018-02-02 02:46:31 +08:00
|
|
|
hfi1_queue_rc_ack(packet, is_fecn);
|
2017-08-05 04:54:41 +08:00
|
|
|
return;
|
2015-07-31 03:17:43 +08:00
|
|
|
}
|
2017-05-13 00:20:08 +08:00
|
|
|
trace_ack_output_ibhdr(dd_from_ibdev(qp->ibqp.device),
|
2017-08-05 04:54:41 +08:00
|
|
|
&opa_hdr, ib_is_sc5(sc5));
|
2015-07-31 03:17:43 +08:00
|
|
|
|
|
|
|
/* write the pbc and data */
|
2017-08-05 04:54:41 +08:00
|
|
|
ppd->dd->pio_inline_send(ppd->dd, pbuf, pbc,
|
|
|
|
(priv->hdr_type == HFI1_PKT_TYPE_9B ?
|
|
|
|
(void *)&opa_hdr.ibh :
|
|
|
|
(void *)&opa_hdr.opah), hwords);
|
2015-07-31 03:17:43 +08:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2019-01-24 11:31:46 +08:00
|
|
|
/**
|
|
|
|
* update_num_rd_atomic - update the qp->s_num_rd_atomic
|
|
|
|
* @qp: the QP
|
|
|
|
* @psn: the packet sequence number to restart at
|
|
|
|
* @wqe: the wqe
|
|
|
|
*
|
|
|
|
* This is called from reset_psn() to update qp->s_num_rd_atomic
|
|
|
|
* for the current wqe.
|
|
|
|
* Called at interrupt level with the QP s_lock held.
|
|
|
|
*/
|
|
|
|
static void update_num_rd_atomic(struct rvt_qp *qp, u32 psn,
|
|
|
|
struct rvt_swqe *wqe)
|
|
|
|
{
|
|
|
|
u32 opcode = wqe->wr.opcode;
|
|
|
|
|
|
|
|
if (opcode == IB_WR_RDMA_READ ||
|
|
|
|
opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
|
|
|
|
opcode == IB_WR_ATOMIC_FETCH_AND_ADD) {
|
|
|
|
qp->s_num_rd_atomic++;
|
|
|
|
} else if (opcode == IB_WR_TID_RDMA_READ) {
|
|
|
|
struct tid_rdma_request *req = wqe_to_tid_req(wqe);
|
|
|
|
struct hfi1_qp_priv *priv = qp->priv;
|
|
|
|
|
|
|
|
if (cmp_psn(psn, wqe->lpsn) <= 0) {
|
|
|
|
u32 cur_seg;
|
|
|
|
|
|
|
|
cur_seg = (psn - wqe->psn) / priv->pkts_ps;
|
|
|
|
req->ack_pending = cur_seg - req->comp_seg;
|
|
|
|
priv->pending_tid_r_segs += req->ack_pending;
|
|
|
|
qp->s_num_rd_atomic += req->ack_pending;
|
|
|
|
} else {
|
|
|
|
priv->pending_tid_r_segs += req->total_segs;
|
|
|
|
qp->s_num_rd_atomic += req->total_segs;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-07-31 03:17:43 +08:00
|
|
|
/**
|
|
|
|
* reset_psn - reset the QP state to send starting from PSN
|
|
|
|
* @qp: the QP
|
|
|
|
* @psn: the packet sequence number to restart at
|
|
|
|
*
|
|
|
|
* This is called from hfi1_rc_rcv() to process an incoming RC ACK
|
|
|
|
* for the given QP.
|
|
|
|
* Called at interrupt level with the QP s_lock held.
|
|
|
|
*/
|
2016-01-20 06:42:28 +08:00
|
|
|
static void reset_psn(struct rvt_qp *qp, u32 psn)
|
2015-07-31 03:17:43 +08:00
|
|
|
{
|
|
|
|
u32 n = qp->s_acked;
|
2016-01-20 06:43:33 +08:00
|
|
|
struct rvt_swqe *wqe = rvt_get_swqe_ptr(qp, n);
|
2015-07-31 03:17:43 +08:00
|
|
|
u32 opcode;
|
2019-01-24 11:31:46 +08:00
|
|
|
struct hfi1_qp_priv *priv = qp->priv;
|
2015-07-31 03:17:43 +08:00
|
|
|
|
2016-09-06 19:37:41 +08:00
|
|
|
lockdep_assert_held(&qp->s_lock);
|
2015-07-31 03:17:43 +08:00
|
|
|
qp->s_cur = n;
|
2019-01-24 11:31:46 +08:00
|
|
|
priv->pending_tid_r_segs = 0;
|
2019-01-24 13:51:39 +08:00
|
|
|
priv->pending_tid_w_resp = 0;
|
2019-01-24 11:31:46 +08:00
|
|
|
qp->s_num_rd_atomic = 0;
|
2015-07-31 03:17:43 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* If we are starting the request from the beginning,
|
|
|
|
* let the normal send code handle initialization.
|
|
|
|
*/
|
|
|
|
if (cmp_psn(psn, wqe->psn) <= 0) {
|
|
|
|
qp->s_state = OP(SEND_LAST);
|
|
|
|
goto done;
|
|
|
|
}
|
2019-01-24 11:31:46 +08:00
|
|
|
update_num_rd_atomic(qp, psn, wqe);
|
2015-07-31 03:17:43 +08:00
|
|
|
|
|
|
|
/* Find the work request opcode corresponding to the given PSN. */
|
|
|
|
for (;;) {
|
|
|
|
int diff;
|
|
|
|
|
|
|
|
if (++n == qp->s_size)
|
|
|
|
n = 0;
|
|
|
|
if (n == qp->s_tail)
|
|
|
|
break;
|
2016-01-20 06:43:33 +08:00
|
|
|
wqe = rvt_get_swqe_ptr(qp, n);
|
2015-07-31 03:17:43 +08:00
|
|
|
diff = cmp_psn(psn, wqe->psn);
|
2019-01-24 11:31:46 +08:00
|
|
|
if (diff < 0) {
|
|
|
|
/* Point wqe back to the previous one*/
|
|
|
|
wqe = rvt_get_swqe_ptr(qp, qp->s_cur);
|
2015-07-31 03:17:43 +08:00
|
|
|
break;
|
2019-01-24 11:31:46 +08:00
|
|
|
}
|
2015-07-31 03:17:43 +08:00
|
|
|
qp->s_cur = n;
|
|
|
|
/*
|
|
|
|
* If we are starting the request from the beginning,
|
|
|
|
* let the normal send code handle initialization.
|
|
|
|
*/
|
|
|
|
if (diff == 0) {
|
|
|
|
qp->s_state = OP(SEND_LAST);
|
|
|
|
goto done;
|
|
|
|
}
|
2019-01-24 11:31:46 +08:00
|
|
|
|
|
|
|
update_num_rd_atomic(qp, psn, wqe);
|
2015-07-31 03:17:43 +08:00
|
|
|
}
|
2019-01-24 11:31:46 +08:00
|
|
|
opcode = wqe->wr.opcode;
|
2015-07-31 03:17:43 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Set the state to restart in the middle of a request.
|
|
|
|
* Don't change the s_sge, s_cur_sge, or s_cur_size.
|
|
|
|
* See hfi1_make_rc_req().
|
|
|
|
*/
|
|
|
|
switch (opcode) {
|
|
|
|
case IB_WR_SEND:
|
|
|
|
case IB_WR_SEND_WITH_IMM:
|
|
|
|
qp->s_state = OP(RDMA_READ_RESPONSE_FIRST);
|
|
|
|
break;
|
|
|
|
|
|
|
|
case IB_WR_RDMA_WRITE:
|
|
|
|
case IB_WR_RDMA_WRITE_WITH_IMM:
|
|
|
|
qp->s_state = OP(RDMA_READ_RESPONSE_LAST);
|
|
|
|
break;
|
|
|
|
|
2019-01-24 13:51:39 +08:00
|
|
|
case IB_WR_TID_RDMA_WRITE:
|
|
|
|
qp->s_state = TID_OP(WRITE_RESP);
|
|
|
|
break;
|
|
|
|
|
2015-07-31 03:17:43 +08:00
|
|
|
case IB_WR_RDMA_READ:
|
|
|
|
qp->s_state = OP(RDMA_READ_RESPONSE_MIDDLE);
|
|
|
|
break;
|
|
|
|
|
2019-01-24 11:31:46 +08:00
|
|
|
case IB_WR_TID_RDMA_READ:
|
|
|
|
qp->s_state = TID_OP(READ_RESP);
|
|
|
|
break;
|
|
|
|
|
2015-07-31 03:17:43 +08:00
|
|
|
default:
|
|
|
|
/*
|
|
|
|
* This case shouldn't happen since its only
|
|
|
|
* one PSN per req.
|
|
|
|
*/
|
|
|
|
qp->s_state = OP(SEND_LAST);
|
|
|
|
}
|
|
|
|
done:
|
2019-01-24 22:36:48 +08:00
|
|
|
priv->s_flags &= ~HFI1_S_TID_WAIT_INTERLCK;
|
2015-07-31 03:17:43 +08:00
|
|
|
qp->s_psn = psn;
|
|
|
|
/*
|
2016-01-20 06:43:01 +08:00
|
|
|
* Set RVT_S_WAIT_PSN as rc_complete() may start the timer
|
2016-09-25 22:42:08 +08:00
|
|
|
* asynchronously before the send engine can get scheduled.
|
2015-07-31 03:17:43 +08:00
|
|
|
* Doing it in hfi1_make_rc_req() is too late.
|
|
|
|
*/
|
|
|
|
if ((cmp_psn(qp->s_psn, qp->s_sending_hpsn) <= 0) &&
|
|
|
|
(cmp_psn(qp->s_sending_psn, qp->s_sending_hpsn) <= 0))
|
2016-01-20 06:43:01 +08:00
|
|
|
qp->s_flags |= RVT_S_WAIT_PSN;
|
2018-06-05 02:44:02 +08:00
|
|
|
qp->s_flags &= ~HFI1_S_AHG_VALID;
|
2019-01-24 11:32:44 +08:00
|
|
|
trace_hfi1_sender_reset_psn(qp);
|
2015-07-31 03:17:43 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Back up requester to resend the last un-ACKed request.
|
|
|
|
* The QP r_lock and s_lock should be held and interrupts disabled.
|
|
|
|
*/
|
2017-02-08 21:27:19 +08:00
|
|
|
void hfi1_restart_rc(struct rvt_qp *qp, u32 psn, int wait)
|
2015-07-31 03:17:43 +08:00
|
|
|
{
|
2019-01-24 11:21:11 +08:00
|
|
|
struct hfi1_qp_priv *priv = qp->priv;
|
2016-01-20 06:43:33 +08:00
|
|
|
struct rvt_swqe *wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
|
2015-07-31 03:17:43 +08:00
|
|
|
struct hfi1_ibport *ibp;
|
|
|
|
|
2016-09-06 19:37:41 +08:00
|
|
|
lockdep_assert_held(&qp->r_lock);
|
|
|
|
lockdep_assert_held(&qp->s_lock);
|
2019-01-24 11:32:44 +08:00
|
|
|
trace_hfi1_sender_restart_rc(qp);
|
2015-07-31 03:17:43 +08:00
|
|
|
if (qp->s_retry == 0) {
|
|
|
|
if (qp->s_mig_state == IB_MIG_ARMED) {
|
|
|
|
hfi1_migrate_qp(qp);
|
|
|
|
qp->s_retry = qp->s_retry_cnt;
|
|
|
|
} else if (qp->s_last == qp->s_acked) {
|
2019-01-24 11:21:11 +08:00
|
|
|
/*
|
|
|
|
* We need special handling for the OPFN request WQEs as
|
|
|
|
* they are not allowed to generate real user errors
|
|
|
|
*/
|
|
|
|
if (wqe->wr.opcode == IB_WR_OPFN) {
|
|
|
|
struct hfi1_ibport *ibp =
|
|
|
|
to_iport(qp->ibqp.device, qp->port_num);
|
|
|
|
/*
|
|
|
|
* Call opfn_conn_reply() with capcode and
|
|
|
|
* remaining data as 0 to close out the
|
|
|
|
* current request
|
|
|
|
*/
|
|
|
|
opfn_conn_reply(qp, priv->opfn.curr);
|
|
|
|
wqe = do_rc_completion(qp, wqe, ibp);
|
|
|
|
qp->s_flags &= ~RVT_S_WAIT_ACK;
|
|
|
|
} else {
|
2019-01-24 13:52:09 +08:00
|
|
|
trace_hfi1_tid_write_sender_restart_rc(qp, 0);
|
2019-01-24 11:31:46 +08:00
|
|
|
if (wqe->wr.opcode == IB_WR_TID_RDMA_READ) {
|
|
|
|
struct tid_rdma_request *req;
|
|
|
|
|
|
|
|
req = wqe_to_tid_req(wqe);
|
|
|
|
hfi1_kern_exp_rcv_clear_all(req);
|
|
|
|
hfi1_kern_clear_hw_flow(priv->rcd, qp);
|
|
|
|
}
|
|
|
|
|
2019-01-24 11:32:09 +08:00
|
|
|
hfi1_trdma_send_complete(qp, wqe,
|
|
|
|
IB_WC_RETRY_EXC_ERR);
|
2019-01-24 11:21:11 +08:00
|
|
|
rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR);
|
|
|
|
}
|
2015-07-31 03:17:43 +08:00
|
|
|
return;
|
2016-02-15 12:22:00 +08:00
|
|
|
} else { /* need to handle delayed completion */
|
2015-07-31 03:17:43 +08:00
|
|
|
return;
|
2016-02-15 12:22:00 +08:00
|
|
|
}
|
|
|
|
} else {
|
2015-07-31 03:17:43 +08:00
|
|
|
qp->s_retry--;
|
2016-02-15 12:22:00 +08:00
|
|
|
}
|
2015-07-31 03:17:43 +08:00
|
|
|
|
|
|
|
ibp = to_iport(qp->ibqp.device, qp->port_num);
|
2019-01-24 11:31:46 +08:00
|
|
|
if (wqe->wr.opcode == IB_WR_RDMA_READ ||
|
|
|
|
wqe->wr.opcode == IB_WR_TID_RDMA_READ)
|
2016-01-20 06:42:39 +08:00
|
|
|
ibp->rvp.n_rc_resends++;
|
2015-07-31 03:17:43 +08:00
|
|
|
else
|
2016-01-20 06:42:39 +08:00
|
|
|
ibp->rvp.n_rc_resends += delta_psn(qp->s_psn, psn);
|
2015-07-31 03:17:43 +08:00
|
|
|
|
2016-01-20 06:43:01 +08:00
|
|
|
qp->s_flags &= ~(RVT_S_WAIT_FENCE | RVT_S_WAIT_RDMAR |
|
|
|
|
RVT_S_WAIT_SSN_CREDIT | RVT_S_WAIT_PSN |
|
2019-01-24 13:51:39 +08:00
|
|
|
RVT_S_WAIT_ACK | HFI1_S_WAIT_TID_RESP);
|
2015-07-31 03:17:43 +08:00
|
|
|
if (wait)
|
2016-01-20 06:43:01 +08:00
|
|
|
qp->s_flags |= RVT_S_SEND_ONE;
|
2015-07-31 03:17:43 +08:00
|
|
|
reset_psn(qp, psn);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Set qp->s_sending_psn to the next PSN after the given one.
|
2019-01-24 13:51:39 +08:00
|
|
|
* This would be psn+1 except when RDMA reads or TID RDMA ops
|
|
|
|
* are present.
|
2015-07-31 03:17:43 +08:00
|
|
|
*/
|
2016-01-20 06:42:28 +08:00
|
|
|
static void reset_sending_psn(struct rvt_qp *qp, u32 psn)
|
2015-07-31 03:17:43 +08:00
|
|
|
{
|
2016-01-20 06:42:28 +08:00
|
|
|
struct rvt_swqe *wqe;
|
2015-07-31 03:17:43 +08:00
|
|
|
u32 n = qp->s_last;
|
|
|
|
|
2016-09-06 19:37:41 +08:00
|
|
|
lockdep_assert_held(&qp->s_lock);
|
2015-07-31 03:17:43 +08:00
|
|
|
/* Find the work request corresponding to the given PSN. */
|
|
|
|
for (;;) {
|
2016-01-20 06:43:33 +08:00
|
|
|
wqe = rvt_get_swqe_ptr(qp, n);
|
2015-07-31 03:17:43 +08:00
|
|
|
if (cmp_psn(psn, wqe->lpsn) <= 0) {
|
2019-01-24 11:32:09 +08:00
|
|
|
if (wqe->wr.opcode == IB_WR_RDMA_READ ||
|
2019-01-24 13:51:39 +08:00
|
|
|
wqe->wr.opcode == IB_WR_TID_RDMA_READ ||
|
|
|
|
wqe->wr.opcode == IB_WR_TID_RDMA_WRITE)
|
2015-07-31 03:17:43 +08:00
|
|
|
qp->s_sending_psn = wqe->lpsn + 1;
|
|
|
|
else
|
|
|
|
qp->s_sending_psn = psn + 1;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if (++n == qp->s_size)
|
|
|
|
n = 0;
|
|
|
|
if (n == qp->s_tail)
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-06-15 00:32:44 +08:00
|
|
|
/**
|
|
|
|
* hfi1_rc_verbs_aborted - handle abort status
|
|
|
|
* @qp: the QP
|
|
|
|
* @opah: the opa header
|
|
|
|
*
|
|
|
|
* This code modifies both ACK bit in BTH[2]
|
|
|
|
* and the s_flags to go into send one mode.
|
|
|
|
*
|
|
|
|
* This serves to throttle the send engine to only
|
|
|
|
* send a single packet in the likely case the
|
|
|
|
* a link has gone down.
|
|
|
|
*/
|
|
|
|
void hfi1_rc_verbs_aborted(struct rvt_qp *qp, struct hfi1_opa_header *opah)
|
|
|
|
{
|
|
|
|
struct ib_other_headers *ohdr = hfi1_get_rc_ohdr(opah);
|
|
|
|
u8 opcode = ib_bth_get_opcode(ohdr);
|
|
|
|
u32 psn;
|
|
|
|
|
|
|
|
/* ignore responses */
|
|
|
|
if ((opcode >= OP(RDMA_READ_RESPONSE_FIRST) &&
|
|
|
|
opcode <= OP(ATOMIC_ACKNOWLEDGE)) ||
|
|
|
|
opcode == TID_OP(READ_RESP) ||
|
|
|
|
opcode == TID_OP(WRITE_RESP))
|
|
|
|
return;
|
|
|
|
|
|
|
|
psn = ib_bth_get_psn(ohdr) | IB_BTH_REQ_ACK;
|
|
|
|
ohdr->bth[2] = cpu_to_be32(psn);
|
|
|
|
qp->s_flags |= RVT_S_SEND_ONE;
|
|
|
|
}
|
|
|
|
|
2015-07-31 03:17:43 +08:00
|
|
|
/*
|
|
|
|
* This should be called with the QP s_lock held and interrupts disabled.
|
|
|
|
*/
|
2017-08-05 04:54:04 +08:00
|
|
|
void hfi1_rc_send_complete(struct rvt_qp *qp, struct hfi1_opa_header *opah)
|
2015-07-31 03:17:43 +08:00
|
|
|
{
|
2016-09-06 19:35:05 +08:00
|
|
|
struct ib_other_headers *ohdr;
|
2017-08-05 04:54:41 +08:00
|
|
|
struct hfi1_qp_priv *priv = qp->priv;
|
2016-01-20 06:42:28 +08:00
|
|
|
struct rvt_swqe *wqe;
|
2019-01-24 13:51:39 +08:00
|
|
|
u32 opcode, head, tail;
|
2015-07-31 03:17:43 +08:00
|
|
|
u32 psn;
|
2019-01-24 13:51:39 +08:00
|
|
|
struct tid_rdma_request *req;
|
2015-07-31 03:17:43 +08:00
|
|
|
|
2016-09-06 19:37:41 +08:00
|
|
|
lockdep_assert_held(&qp->s_lock);
|
2017-02-08 21:27:49 +08:00
|
|
|
if (!(ib_rvt_state_ops[qp->state] & RVT_SEND_OR_FLUSH_OR_RECV_OK))
|
2015-07-31 03:17:43 +08:00
|
|
|
return;
|
|
|
|
|
2019-06-15 00:32:38 +08:00
|
|
|
ohdr = hfi1_get_rc_ohdr(opah);
|
2017-04-10 01:16:22 +08:00
|
|
|
opcode = ib_bth_get_opcode(ohdr);
|
2019-01-24 11:32:09 +08:00
|
|
|
if ((opcode >= OP(RDMA_READ_RESPONSE_FIRST) &&
|
|
|
|
opcode <= OP(ATOMIC_ACKNOWLEDGE)) ||
|
2019-01-24 13:51:39 +08:00
|
|
|
opcode == TID_OP(READ_RESP) ||
|
|
|
|
opcode == TID_OP(WRITE_RESP)) {
|
2015-07-31 03:17:43 +08:00
|
|
|
WARN_ON(!qp->s_rdma_ack_cnt);
|
|
|
|
qp->s_rdma_ack_cnt--;
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2017-05-13 00:19:55 +08:00
|
|
|
psn = ib_bth_get_psn(ohdr);
|
2019-01-24 13:51:39 +08:00
|
|
|
/*
|
|
|
|
* Don't attempt to reset the sending PSN for packets in the
|
|
|
|
* KDETH PSN space since the PSN does not match anything.
|
|
|
|
*/
|
|
|
|
if (opcode != TID_OP(WRITE_DATA) &&
|
|
|
|
opcode != TID_OP(WRITE_DATA_LAST) &&
|
|
|
|
opcode != TID_OP(ACK) && opcode != TID_OP(RESYNC))
|
|
|
|
reset_sending_psn(qp, psn);
|
|
|
|
|
|
|
|
/* Handle TID RDMA WRITE packets differently */
|
|
|
|
if (opcode >= TID_OP(WRITE_REQ) &&
|
|
|
|
opcode <= TID_OP(WRITE_DATA_LAST)) {
|
|
|
|
head = priv->s_tid_head;
|
|
|
|
tail = priv->s_tid_cur;
|
|
|
|
/*
|
|
|
|
* s_tid_cur is set to s_tid_head in the case, where
|
|
|
|
* a new TID RDMA request is being started and all
|
|
|
|
* previous ones have been completed.
|
|
|
|
* Therefore, we need to do a secondary check in order
|
|
|
|
* to properly determine whether we should start the
|
|
|
|
* RC timer.
|
|
|
|
*/
|
|
|
|
wqe = rvt_get_swqe_ptr(qp, tail);
|
|
|
|
req = wqe_to_tid_req(wqe);
|
|
|
|
if (head == tail && req->comp_seg < req->total_segs) {
|
|
|
|
if (tail == 0)
|
|
|
|
tail = qp->s_size - 1;
|
|
|
|
else
|
|
|
|
tail -= 1;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
head = qp->s_tail;
|
|
|
|
tail = qp->s_acked;
|
|
|
|
}
|
2015-07-31 03:17:43 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Start timer after a packet requesting an ACK has been sent and
|
|
|
|
* there are still requests that haven't been acked.
|
|
|
|
*/
|
2019-01-24 13:51:39 +08:00
|
|
|
if ((psn & IB_BTH_REQ_ACK) && tail != head &&
|
|
|
|
opcode != TID_OP(WRITE_DATA) && opcode != TID_OP(WRITE_DATA_LAST) &&
|
|
|
|
opcode != TID_OP(RESYNC) &&
|
2015-07-31 03:17:43 +08:00
|
|
|
!(qp->s_flags &
|
2019-01-24 13:51:39 +08:00
|
|
|
(RVT_S_TIMER | RVT_S_WAIT_RNR | RVT_S_WAIT_PSN)) &&
|
|
|
|
(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK)) {
|
2019-01-24 11:32:09 +08:00
|
|
|
if (opcode == TID_OP(READ_REQ))
|
|
|
|
rvt_add_retry_timer_ext(qp, priv->timeout_shift);
|
|
|
|
else
|
|
|
|
rvt_add_retry_timer(qp);
|
|
|
|
}
|
2015-07-31 03:17:43 +08:00
|
|
|
|
2019-01-24 13:51:39 +08:00
|
|
|
/* Start TID RDMA ACK timer */
|
|
|
|
if ((opcode == TID_OP(WRITE_DATA) ||
|
|
|
|
opcode == TID_OP(WRITE_DATA_LAST) ||
|
|
|
|
opcode == TID_OP(RESYNC)) &&
|
|
|
|
(psn & IB_BTH_REQ_ACK) &&
|
|
|
|
!(priv->s_flags & HFI1_S_TID_RETRY_TIMER) &&
|
|
|
|
(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK)) {
|
|
|
|
/*
|
|
|
|
* The TID RDMA ACK packet could be received before this
|
|
|
|
* function is called. Therefore, add the timer only if TID
|
|
|
|
* RDMA ACK packets are actually pending.
|
|
|
|
*/
|
|
|
|
wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
|
|
|
|
req = wqe_to_tid_req(wqe);
|
|
|
|
if (wqe->wr.opcode == IB_WR_TID_RDMA_WRITE &&
|
|
|
|
req->ack_seg < req->cur_seg)
|
|
|
|
hfi1_add_tid_retry_timer(qp);
|
|
|
|
}
|
|
|
|
|
2015-07-31 03:17:43 +08:00
|
|
|
while (qp->s_last != qp->s_acked) {
|
2016-01-20 06:43:33 +08:00
|
|
|
wqe = rvt_get_swqe_ptr(qp, qp->s_last);
|
2015-07-31 03:17:43 +08:00
|
|
|
if (cmp_psn(wqe->lpsn, qp->s_sending_psn) >= 0 &&
|
|
|
|
cmp_psn(qp->s_sending_psn, qp->s_sending_hpsn) <= 0)
|
|
|
|
break;
|
2019-01-24 11:32:09 +08:00
|
|
|
trdma_clean_swqe(qp, wqe);
|
2019-06-13 20:30:52 +08:00
|
|
|
trace_hfi1_qp_send_completion(qp, wqe, qp->s_last);
|
|
|
|
rvt_qp_complete_swqe(qp,
|
2017-03-21 08:25:04 +08:00
|
|
|
wqe,
|
|
|
|
ib_hfi1_wc_opcode[wqe->wr.opcode],
|
|
|
|
IB_WC_SUCCESS);
|
2015-07-31 03:17:43 +08:00
|
|
|
}
|
|
|
|
/*
|
|
|
|
* If we were waiting for sends to complete before re-sending,
|
|
|
|
* and they are now complete, restart sending.
|
|
|
|
*/
|
2016-07-02 07:01:06 +08:00
|
|
|
trace_hfi1_sendcomplete(qp, psn);
|
2016-01-20 06:43:01 +08:00
|
|
|
if (qp->s_flags & RVT_S_WAIT_PSN &&
|
2015-07-31 03:17:43 +08:00
|
|
|
cmp_psn(qp->s_sending_psn, qp->s_sending_hpsn) > 0) {
|
2016-01-20 06:43:01 +08:00
|
|
|
qp->s_flags &= ~RVT_S_WAIT_PSN;
|
2015-07-31 03:17:43 +08:00
|
|
|
qp->s_sending_psn = qp->s_psn;
|
|
|
|
qp->s_sending_hpsn = qp->s_psn - 1;
|
|
|
|
hfi1_schedule_send(qp);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-01-20 06:42:28 +08:00
|
|
|
static inline void update_last_psn(struct rvt_qp *qp, u32 psn)
|
2015-07-31 03:17:43 +08:00
|
|
|
{
|
|
|
|
qp->s_last_psn = psn;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Generate a SWQE completion.
|
|
|
|
* This is similar to hfi1_send_complete but has to check to be sure
|
|
|
|
* that the SGEs are not being referenced if the SWQE is being resent.
|
|
|
|
*/
|
2019-01-24 11:29:44 +08:00
|
|
|
struct rvt_swqe *do_rc_completion(struct rvt_qp *qp,
|
|
|
|
struct rvt_swqe *wqe,
|
|
|
|
struct hfi1_ibport *ibp)
|
2015-07-31 03:17:43 +08:00
|
|
|
{
|
2019-01-24 22:36:48 +08:00
|
|
|
struct hfi1_qp_priv *priv = qp->priv;
|
|
|
|
|
2016-09-06 19:37:41 +08:00
|
|
|
lockdep_assert_held(&qp->s_lock);
|
2015-07-31 03:17:43 +08:00
|
|
|
/*
|
|
|
|
* Don't decrement refcount and don't generate a
|
|
|
|
* completion if the SWQE is being resent until the send
|
|
|
|
* is finished.
|
|
|
|
*/
|
2019-01-24 11:32:44 +08:00
|
|
|
trace_hfi1_rc_completion(qp, wqe->lpsn);
|
2015-07-31 03:17:43 +08:00
|
|
|
if (cmp_psn(wqe->lpsn, qp->s_sending_psn) < 0 ||
|
|
|
|
cmp_psn(qp->s_sending_psn, qp->s_sending_hpsn) > 0) {
|
2019-01-24 11:32:09 +08:00
|
|
|
trdma_clean_swqe(qp, wqe);
|
2019-06-13 20:30:52 +08:00
|
|
|
trace_hfi1_qp_send_completion(qp, wqe, qp->s_last);
|
|
|
|
rvt_qp_complete_swqe(qp,
|
2017-03-21 08:25:04 +08:00
|
|
|
wqe,
|
|
|
|
ib_hfi1_wc_opcode[wqe->wr.opcode],
|
|
|
|
IB_WC_SUCCESS);
|
2015-07-31 03:17:43 +08:00
|
|
|
} else {
|
|
|
|
struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
|
|
|
|
|
2016-01-20 06:42:39 +08:00
|
|
|
this_cpu_inc(*ibp->rvp.rc_delayed_comp);
|
2015-07-31 03:17:43 +08:00
|
|
|
/*
|
|
|
|
* If send progress not running attempt to progress
|
|
|
|
* SDMA queue.
|
|
|
|
*/
|
|
|
|
if (ppd->dd->flags & HFI1_HAS_SEND_DMA) {
|
|
|
|
struct sdma_engine *engine;
|
2017-04-30 02:41:28 +08:00
|
|
|
u8 sl = rdma_ah_get_sl(&qp->remote_ah_attr);
|
2015-07-31 03:17:43 +08:00
|
|
|
u8 sc5;
|
|
|
|
|
|
|
|
/* For now use sc to find engine */
|
2017-04-30 02:41:28 +08:00
|
|
|
sc5 = ibp->sl_to_sc[sl];
|
2015-07-31 03:17:43 +08:00
|
|
|
engine = qp_to_sdma_engine(qp, sc5);
|
|
|
|
sdma_engine_progress_schedule(engine);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
qp->s_retry = qp->s_retry_cnt;
|
2019-01-24 13:51:39 +08:00
|
|
|
/*
|
|
|
|
* Don't update the last PSN if the request being completed is
|
|
|
|
* a TID RDMA WRITE request.
|
|
|
|
* Completion of the TID RDMA WRITE requests are done by the
|
|
|
|
* TID RDMA ACKs and as such could be for a request that has
|
|
|
|
* already been ACKed as far as the IB state machine is
|
|
|
|
* concerned.
|
|
|
|
*/
|
|
|
|
if (wqe->wr.opcode != IB_WR_TID_RDMA_WRITE)
|
|
|
|
update_last_psn(qp, wqe->lpsn);
|
2015-07-31 03:17:43 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* If we are completing a request which is in the process of
|
|
|
|
* being resent, we can stop re-sending it since we know the
|
|
|
|
* responder has already seen it.
|
|
|
|
*/
|
|
|
|
if (qp->s_acked == qp->s_cur) {
|
|
|
|
if (++qp->s_cur >= qp->s_size)
|
|
|
|
qp->s_cur = 0;
|
|
|
|
qp->s_acked = qp->s_cur;
|
2016-01-20 06:43:33 +08:00
|
|
|
wqe = rvt_get_swqe_ptr(qp, qp->s_cur);
|
2015-07-31 03:17:43 +08:00
|
|
|
if (qp->s_acked != qp->s_tail) {
|
|
|
|
qp->s_state = OP(SEND_LAST);
|
|
|
|
qp->s_psn = wqe->psn;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
if (++qp->s_acked >= qp->s_size)
|
|
|
|
qp->s_acked = 0;
|
|
|
|
if (qp->state == IB_QPS_SQD && qp->s_acked == qp->s_cur)
|
|
|
|
qp->s_draining = 0;
|
2016-01-20 06:43:33 +08:00
|
|
|
wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
|
2015-07-31 03:17:43 +08:00
|
|
|
}
|
2019-01-24 22:36:48 +08:00
|
|
|
if (priv->s_flags & HFI1_S_TID_WAIT_INTERLCK) {
|
|
|
|
priv->s_flags &= ~HFI1_S_TID_WAIT_INTERLCK;
|
|
|
|
hfi1_schedule_send(qp);
|
|
|
|
}
|
2015-07-31 03:17:43 +08:00
|
|
|
return wqe;
|
|
|
|
}
|
|
|
|
|
2019-01-24 13:51:39 +08:00
|
|
|
static void set_restart_qp(struct rvt_qp *qp, struct hfi1_ctxtdata *rcd)
|
|
|
|
{
|
|
|
|
/* Retry this request. */
|
|
|
|
if (!(qp->r_flags & RVT_R_RDMAR_SEQ)) {
|
|
|
|
qp->r_flags |= RVT_R_RDMAR_SEQ;
|
|
|
|
hfi1_restart_rc(qp, qp->s_last_psn + 1, 0);
|
|
|
|
if (list_empty(&qp->rspwait)) {
|
|
|
|
qp->r_flags |= RVT_R_RSP_SEND;
|
|
|
|
rvt_get_qp(qp);
|
|
|
|
list_add_tail(&qp->rspwait, &rcd->qp_wait_list);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* update_qp_retry_state - Update qp retry state.
|
|
|
|
* @qp: the QP
|
|
|
|
* @psn: the packet sequence number of the TID RDMA WRITE RESP.
|
|
|
|
* @spsn: The start psn for the given TID RDMA WRITE swqe.
|
|
|
|
* @lpsn: The last psn for the given TID RDMA WRITE swqe.
|
|
|
|
*
|
|
|
|
* This function is called to update the qp retry state upon
|
|
|
|
* receiving a TID WRITE RESP after the qp is scheduled to retry
|
|
|
|
* a request.
|
|
|
|
*/
|
|
|
|
static void update_qp_retry_state(struct rvt_qp *qp, u32 psn, u32 spsn,
|
|
|
|
u32 lpsn)
|
|
|
|
{
|
|
|
|
struct hfi1_qp_priv *qpriv = qp->priv;
|
|
|
|
|
|
|
|
qp->s_psn = psn + 1;
|
|
|
|
/*
|
|
|
|
* If this is the first TID RDMA WRITE RESP packet for the current
|
|
|
|
* request, change the s_state so that the retry will be processed
|
|
|
|
* correctly. Similarly, if this is the last TID RDMA WRITE RESP
|
|
|
|
* packet, change the s_state and advance the s_cur.
|
|
|
|
*/
|
|
|
|
if (cmp_psn(psn, lpsn) >= 0) {
|
|
|
|
qp->s_cur = qpriv->s_tid_cur + 1;
|
|
|
|
if (qp->s_cur >= qp->s_size)
|
|
|
|
qp->s_cur = 0;
|
|
|
|
qp->s_state = TID_OP(WRITE_REQ);
|
|
|
|
} else if (!cmp_psn(psn, spsn)) {
|
|
|
|
qp->s_cur = qpriv->s_tid_cur;
|
|
|
|
qp->s_state = TID_OP(WRITE_RESP);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-07-31 03:17:43 +08:00
|
|
|
/**
|
|
|
|
* do_rc_ack - process an incoming RC ACK
|
|
|
|
* @qp: the QP the ACK came in on
|
|
|
|
* @psn: the packet sequence number of the ACK
|
|
|
|
* @opcode: the opcode of the request that resulted in the ACK
|
|
|
|
*
|
|
|
|
* This is called from rc_rcv_resp() to process an incoming RC ACK
|
|
|
|
* for the given QP.
|
2015-10-26 22:28:43 +08:00
|
|
|
* May be called at interrupt level, with the QP s_lock held.
|
2015-07-31 03:17:43 +08:00
|
|
|
* Returns 1 if OK, 0 if current operation should be aborted (NAK).
|
|
|
|
*/
|
2019-01-24 11:29:44 +08:00
|
|
|
int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode,
|
|
|
|
u64 val, struct hfi1_ctxtdata *rcd)
|
2015-07-31 03:17:43 +08:00
|
|
|
{
|
|
|
|
struct hfi1_ibport *ibp;
|
|
|
|
enum ib_wc_status status;
|
2019-01-24 11:32:09 +08:00
|
|
|
struct hfi1_qp_priv *qpriv = qp->priv;
|
2016-01-20 06:42:28 +08:00
|
|
|
struct rvt_swqe *wqe;
|
2015-07-31 03:17:43 +08:00
|
|
|
int ret = 0;
|
|
|
|
u32 ack_psn;
|
|
|
|
int diff;
|
2019-01-24 13:51:39 +08:00
|
|
|
struct rvt_dev_info *rdi;
|
2015-07-31 03:17:43 +08:00
|
|
|
|
2016-09-06 19:37:41 +08:00
|
|
|
lockdep_assert_held(&qp->s_lock);
|
2015-07-31 03:17:43 +08:00
|
|
|
/*
|
|
|
|
* Note that NAKs implicitly ACK outstanding SEND and RDMA write
|
|
|
|
* requests and implicitly NAK RDMA read and atomic requests issued
|
|
|
|
* before the NAK'ed request. The MSN won't include the NAK'ed
|
|
|
|
* request but will include an ACK'ed request(s).
|
|
|
|
*/
|
|
|
|
ack_psn = psn;
|
2017-02-08 21:28:25 +08:00
|
|
|
if (aeth >> IB_AETH_NAK_SHIFT)
|
2015-07-31 03:17:43 +08:00
|
|
|
ack_psn--;
|
2016-01-20 06:43:33 +08:00
|
|
|
wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
|
2017-02-08 21:26:25 +08:00
|
|
|
ibp = rcd_to_iport(rcd);
|
2015-07-31 03:17:43 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* The MSN might be for a later WQE than the PSN indicates so
|
|
|
|
* only complete WQEs that the PSN finishes.
|
|
|
|
*/
|
|
|
|
while ((diff = delta_psn(ack_psn, wqe->lpsn)) >= 0) {
|
|
|
|
/*
|
|
|
|
* RDMA_READ_RESPONSE_ONLY is a special case since
|
|
|
|
* we want to generate completion events for everything
|
|
|
|
* before the RDMA read, copy the data, then generate
|
|
|
|
* the completion for the read.
|
|
|
|
*/
|
|
|
|
if (wqe->wr.opcode == IB_WR_RDMA_READ &&
|
|
|
|
opcode == OP(RDMA_READ_RESPONSE_ONLY) &&
|
|
|
|
diff == 0) {
|
|
|
|
ret = 1;
|
2016-02-05 02:59:18 +08:00
|
|
|
goto bail_stop;
|
2015-07-31 03:17:43 +08:00
|
|
|
}
|
|
|
|
/*
|
|
|
|
* If this request is a RDMA read or atomic, and the ACK is
|
|
|
|
* for a later operation, this ACK NAKs the RDMA read or
|
|
|
|
* atomic. In other words, only a RDMA_READ_LAST or ONLY
|
|
|
|
* can ACK a RDMA read and likewise for atomic ops. Note
|
|
|
|
* that the NAK case can only happen if relaxed ordering is
|
|
|
|
* used and requests are sent after an RDMA read or atomic
|
|
|
|
* is sent but before the response is received.
|
|
|
|
*/
|
|
|
|
if ((wqe->wr.opcode == IB_WR_RDMA_READ &&
|
|
|
|
(opcode != OP(RDMA_READ_RESPONSE_LAST) || diff != 0)) ||
|
2019-01-24 11:32:09 +08:00
|
|
|
(wqe->wr.opcode == IB_WR_TID_RDMA_READ &&
|
|
|
|
(opcode != TID_OP(READ_RESP) || diff != 0)) ||
|
2015-07-31 03:17:43 +08:00
|
|
|
((wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
|
|
|
|
wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) &&
|
2019-01-24 13:51:39 +08:00
|
|
|
(opcode != OP(ATOMIC_ACKNOWLEDGE) || diff != 0)) ||
|
|
|
|
(wqe->wr.opcode == IB_WR_TID_RDMA_WRITE &&
|
|
|
|
(delta_psn(psn, qp->s_last_psn) != 1))) {
|
|
|
|
set_restart_qp(qp, rcd);
|
2015-07-31 03:17:43 +08:00
|
|
|
/*
|
|
|
|
* No need to process the ACK/NAK since we are
|
|
|
|
* restarting an earlier request.
|
|
|
|
*/
|
2016-02-05 02:59:18 +08:00
|
|
|
goto bail_stop;
|
2015-07-31 03:17:43 +08:00
|
|
|
}
|
|
|
|
if (wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
|
|
|
|
wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) {
|
|
|
|
u64 *vaddr = wqe->sg_list[0].vaddr;
|
|
|
|
*vaddr = val;
|
|
|
|
}
|
2019-01-24 11:21:11 +08:00
|
|
|
if (wqe->wr.opcode == IB_WR_OPFN)
|
|
|
|
opfn_conn_reply(qp, val);
|
|
|
|
|
2015-07-31 03:17:43 +08:00
|
|
|
if (qp->s_num_rd_atomic &&
|
|
|
|
(wqe->wr.opcode == IB_WR_RDMA_READ ||
|
|
|
|
wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
|
|
|
|
wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD)) {
|
|
|
|
qp->s_num_rd_atomic--;
|
|
|
|
/* Restart sending task if fence is complete */
|
2016-01-20 06:43:01 +08:00
|
|
|
if ((qp->s_flags & RVT_S_WAIT_FENCE) &&
|
2015-07-31 03:17:43 +08:00
|
|
|
!qp->s_num_rd_atomic) {
|
2016-01-20 06:43:01 +08:00
|
|
|
qp->s_flags &= ~(RVT_S_WAIT_FENCE |
|
|
|
|
RVT_S_WAIT_ACK);
|
2015-07-31 03:17:43 +08:00
|
|
|
hfi1_schedule_send(qp);
|
2016-01-20 06:43:01 +08:00
|
|
|
} else if (qp->s_flags & RVT_S_WAIT_RDMAR) {
|
|
|
|
qp->s_flags &= ~(RVT_S_WAIT_RDMAR |
|
|
|
|
RVT_S_WAIT_ACK);
|
2015-07-31 03:17:43 +08:00
|
|
|
hfi1_schedule_send(qp);
|
|
|
|
}
|
|
|
|
}
|
2019-01-24 13:51:39 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* TID RDMA WRITE requests will be completed by the TID RDMA
|
|
|
|
* ACK packet handler (see tid_rdma.c).
|
|
|
|
*/
|
|
|
|
if (wqe->wr.opcode == IB_WR_TID_RDMA_WRITE)
|
|
|
|
break;
|
|
|
|
|
2015-07-31 03:17:43 +08:00
|
|
|
wqe = do_rc_completion(qp, wqe, ibp);
|
|
|
|
if (qp->s_acked == qp->s_tail)
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2019-01-24 11:32:44 +08:00
|
|
|
trace_hfi1_rc_ack_do(qp, aeth, psn, wqe);
|
|
|
|
trace_hfi1_sender_do_rc_ack(qp);
|
2017-02-08 21:28:25 +08:00
|
|
|
switch (aeth >> IB_AETH_NAK_SHIFT) {
|
2015-07-31 03:17:43 +08:00
|
|
|
case 0: /* ACK */
|
2016-01-20 06:42:39 +08:00
|
|
|
this_cpu_inc(*ibp->rvp.rc_acks);
|
2019-01-24 11:32:09 +08:00
|
|
|
if (wqe->wr.opcode == IB_WR_TID_RDMA_READ) {
|
|
|
|
if (wqe_to_tid_req(wqe)->ack_pending)
|
|
|
|
rvt_mod_retry_timer_ext(qp,
|
|
|
|
qpriv->timeout_shift);
|
|
|
|
else
|
|
|
|
rvt_stop_rc_timers(qp);
|
|
|
|
} else if (qp->s_acked != qp->s_tail) {
|
2019-01-24 13:51:39 +08:00
|
|
|
struct rvt_swqe *__w = NULL;
|
|
|
|
|
|
|
|
if (qpriv->s_tid_cur != HFI1_QP_WQE_INVALID)
|
|
|
|
__w = rvt_get_swqe_ptr(qp, qpriv->s_tid_cur);
|
|
|
|
|
2015-07-31 03:17:43 +08:00
|
|
|
/*
|
2019-01-24 13:51:39 +08:00
|
|
|
* Stop timers if we've received all of the TID RDMA
|
|
|
|
* WRITE * responses.
|
2015-07-31 03:17:43 +08:00
|
|
|
*/
|
2019-01-24 13:51:39 +08:00
|
|
|
if (__w && __w->wr.opcode == IB_WR_TID_RDMA_WRITE &&
|
|
|
|
opcode == TID_OP(WRITE_RESP)) {
|
|
|
|
/*
|
|
|
|
* Normally, the loop above would correctly
|
|
|
|
* process all WQEs from s_acked onward and
|
|
|
|
* either complete them or check for correct
|
|
|
|
* PSN sequencing.
|
|
|
|
* However, for TID RDMA, due to pipelining,
|
|
|
|
* the response may not be for the request at
|
|
|
|
* s_acked so the above look would just be
|
|
|
|
* skipped. This does not allow for checking
|
|
|
|
* the PSN sequencing. It has to be done
|
|
|
|
* separately.
|
|
|
|
*/
|
|
|
|
if (cmp_psn(psn, qp->s_last_psn + 1)) {
|
|
|
|
set_restart_qp(qp, rcd);
|
|
|
|
goto bail_stop;
|
|
|
|
}
|
|
|
|
/*
|
|
|
|
* If the psn is being resent, stop the
|
|
|
|
* resending.
|
|
|
|
*/
|
|
|
|
if (qp->s_cur != qp->s_tail &&
|
|
|
|
cmp_psn(qp->s_psn, psn) <= 0)
|
|
|
|
update_qp_retry_state(qp, psn,
|
|
|
|
__w->psn,
|
|
|
|
__w->lpsn);
|
|
|
|
else if (--qpriv->pending_tid_w_resp)
|
|
|
|
rvt_mod_retry_timer(qp);
|
|
|
|
else
|
|
|
|
rvt_stop_rc_timers(qp);
|
|
|
|
} else {
|
|
|
|
/*
|
|
|
|
* We are expecting more ACKs so
|
|
|
|
* mod the retry timer.
|
|
|
|
*/
|
|
|
|
rvt_mod_retry_timer(qp);
|
|
|
|
/*
|
|
|
|
* We can stop re-sending the earlier packets
|
|
|
|
* and continue with the next packet the
|
|
|
|
* receiver wants.
|
|
|
|
*/
|
|
|
|
if (cmp_psn(qp->s_psn, psn) <= 0)
|
|
|
|
reset_psn(qp, psn + 1);
|
|
|
|
}
|
2016-02-05 02:59:18 +08:00
|
|
|
} else {
|
|
|
|
/* No more acks - kill all timers */
|
2017-02-08 21:27:19 +08:00
|
|
|
rvt_stop_rc_timers(qp);
|
2016-02-05 02:59:18 +08:00
|
|
|
if (cmp_psn(qp->s_psn, psn) <= 0) {
|
|
|
|
qp->s_state = OP(SEND_LAST);
|
|
|
|
qp->s_psn = psn + 1;
|
|
|
|
}
|
2015-07-31 03:17:43 +08:00
|
|
|
}
|
2016-01-20 06:43:01 +08:00
|
|
|
if (qp->s_flags & RVT_S_WAIT_ACK) {
|
|
|
|
qp->s_flags &= ~RVT_S_WAIT_ACK;
|
2015-07-31 03:17:43 +08:00
|
|
|
hfi1_schedule_send(qp);
|
|
|
|
}
|
2017-02-08 21:27:07 +08:00
|
|
|
rvt_get_credit(qp, aeth);
|
2015-07-31 03:17:43 +08:00
|
|
|
qp->s_rnr_retry = qp->s_rnr_retry_cnt;
|
|
|
|
qp->s_retry = qp->s_retry_cnt;
|
2019-01-24 13:51:39 +08:00
|
|
|
/*
|
|
|
|
* If the current request is a TID RDMA WRITE request and the
|
|
|
|
* response is not a TID RDMA WRITE RESP packet, s_last_psn
|
|
|
|
* can't be advanced.
|
|
|
|
*/
|
|
|
|
if (wqe->wr.opcode == IB_WR_TID_RDMA_WRITE &&
|
|
|
|
opcode != TID_OP(WRITE_RESP) &&
|
|
|
|
cmp_psn(psn, wqe->psn) >= 0)
|
|
|
|
return 1;
|
2015-07-31 03:17:43 +08:00
|
|
|
update_last_psn(qp, psn);
|
2016-02-05 02:59:18 +08:00
|
|
|
return 1;
|
2015-07-31 03:17:43 +08:00
|
|
|
|
|
|
|
case 1: /* RNR NAK */
|
2016-01-20 06:42:39 +08:00
|
|
|
ibp->rvp.n_rnr_naks++;
|
2015-07-31 03:17:43 +08:00
|
|
|
if (qp->s_acked == qp->s_tail)
|
2016-02-05 02:59:18 +08:00
|
|
|
goto bail_stop;
|
2016-01-20 06:43:01 +08:00
|
|
|
if (qp->s_flags & RVT_S_WAIT_RNR)
|
2016-02-05 02:59:18 +08:00
|
|
|
goto bail_stop;
|
2019-01-24 13:51:39 +08:00
|
|
|
rdi = ib_to_rvt(qp->ibqp.device);
|
|
|
|
if (qp->s_rnr_retry == 0 &&
|
|
|
|
!((rdi->post_parms[wqe->wr.opcode].flags &
|
|
|
|
RVT_OPERATION_IGN_RNR_CNT) &&
|
|
|
|
qp->s_rnr_retry_cnt == 0)) {
|
2015-07-31 03:17:43 +08:00
|
|
|
status = IB_WC_RNR_RETRY_EXC_ERR;
|
|
|
|
goto class_b;
|
|
|
|
}
|
2019-01-24 13:51:39 +08:00
|
|
|
if (qp->s_rnr_retry_cnt < 7 && qp->s_rnr_retry_cnt > 0)
|
2015-07-31 03:17:43 +08:00
|
|
|
qp->s_rnr_retry--;
|
|
|
|
|
2019-01-24 13:51:39 +08:00
|
|
|
/*
|
|
|
|
* The last valid PSN is the previous PSN. For TID RDMA WRITE
|
|
|
|
* request, s_last_psn should be incremented only when a TID
|
|
|
|
* RDMA WRITE RESP is received to avoid skipping lost TID RDMA
|
|
|
|
* WRITE RESP packets.
|
|
|
|
*/
|
|
|
|
if (wqe->wr.opcode == IB_WR_TID_RDMA_WRITE) {
|
|
|
|
reset_psn(qp, qp->s_last_psn + 1);
|
|
|
|
} else {
|
|
|
|
update_last_psn(qp, psn - 1);
|
|
|
|
reset_psn(qp, psn);
|
|
|
|
}
|
2015-07-31 03:17:43 +08:00
|
|
|
|
2016-01-20 06:42:39 +08:00
|
|
|
ibp->rvp.n_rc_resends += delta_psn(qp->s_psn, psn);
|
2016-01-20 06:43:01 +08:00
|
|
|
qp->s_flags &= ~(RVT_S_WAIT_SSN_CREDIT | RVT_S_WAIT_ACK);
|
2017-02-08 21:27:19 +08:00
|
|
|
rvt_stop_rc_timers(qp);
|
|
|
|
rvt_add_rnr_timer(qp, aeth);
|
2016-02-05 02:59:18 +08:00
|
|
|
return 0;
|
2015-07-31 03:17:43 +08:00
|
|
|
|
|
|
|
case 3: /* NAK */
|
|
|
|
if (qp->s_acked == qp->s_tail)
|
2016-02-05 02:59:18 +08:00
|
|
|
goto bail_stop;
|
2015-07-31 03:17:43 +08:00
|
|
|
/* The last valid PSN is the previous PSN. */
|
|
|
|
update_last_psn(qp, psn - 1);
|
2017-02-08 21:28:25 +08:00
|
|
|
switch ((aeth >> IB_AETH_CREDIT_SHIFT) &
|
|
|
|
IB_AETH_CREDIT_MASK) {
|
2015-07-31 03:17:43 +08:00
|
|
|
case 0: /* PSN sequence error */
|
2016-01-20 06:42:39 +08:00
|
|
|
ibp->rvp.n_seq_naks++;
|
2015-07-31 03:17:43 +08:00
|
|
|
/*
|
|
|
|
* Back up to the responder's expected PSN.
|
|
|
|
* Note that we might get a NAK in the middle of an
|
|
|
|
* RDMA READ response which terminates the RDMA
|
|
|
|
* READ.
|
|
|
|
*/
|
2017-02-08 21:27:19 +08:00
|
|
|
hfi1_restart_rc(qp, psn, 0);
|
2015-07-31 03:17:43 +08:00
|
|
|
hfi1_schedule_send(qp);
|
|
|
|
break;
|
|
|
|
|
|
|
|
case 1: /* Invalid Request */
|
|
|
|
status = IB_WC_REM_INV_REQ_ERR;
|
2016-01-20 06:42:39 +08:00
|
|
|
ibp->rvp.n_other_naks++;
|
2015-07-31 03:17:43 +08:00
|
|
|
goto class_b;
|
|
|
|
|
|
|
|
case 2: /* Remote Access Error */
|
|
|
|
status = IB_WC_REM_ACCESS_ERR;
|
2016-01-20 06:42:39 +08:00
|
|
|
ibp->rvp.n_other_naks++;
|
2015-07-31 03:17:43 +08:00
|
|
|
goto class_b;
|
|
|
|
|
|
|
|
case 3: /* Remote Operation Error */
|
|
|
|
status = IB_WC_REM_OP_ERR;
|
2016-01-20 06:42:39 +08:00
|
|
|
ibp->rvp.n_other_naks++;
|
2015-07-31 03:17:43 +08:00
|
|
|
class_b:
|
|
|
|
if (qp->s_last == qp->s_acked) {
|
2019-01-24 11:32:09 +08:00
|
|
|
if (wqe->wr.opcode == IB_WR_TID_RDMA_READ)
|
|
|
|
hfi1_kern_read_tid_flow_free(qp);
|
|
|
|
|
|
|
|
hfi1_trdma_send_complete(qp, wqe, status);
|
2016-01-20 06:43:44 +08:00
|
|
|
rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR);
|
2015-07-31 03:17:43 +08:00
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
|
|
|
default:
|
|
|
|
/* Ignore other reserved NAK error codes */
|
|
|
|
goto reserved;
|
|
|
|
}
|
|
|
|
qp->s_retry = qp->s_retry_cnt;
|
|
|
|
qp->s_rnr_retry = qp->s_rnr_retry_cnt;
|
2016-02-05 02:59:18 +08:00
|
|
|
goto bail_stop;
|
2015-07-31 03:17:43 +08:00
|
|
|
|
|
|
|
default: /* 2: reserved */
|
|
|
|
reserved:
|
|
|
|
/* Ignore reserved NAK codes. */
|
2016-02-05 02:59:18 +08:00
|
|
|
goto bail_stop;
|
2015-07-31 03:17:43 +08:00
|
|
|
}
|
2016-04-13 02:28:56 +08:00
|
|
|
/* cannot be reached */
|
2016-02-05 02:59:18 +08:00
|
|
|
bail_stop:
|
2017-02-08 21:27:19 +08:00
|
|
|
rvt_stop_rc_timers(qp);
|
2015-07-31 03:17:43 +08:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* We have seen an out of sequence RDMA read middle or last packet.
|
|
|
|
* This ACKs SENDs and RDMA writes up to the first RDMA read or atomic SWQE.
|
|
|
|
*/
|
2016-01-20 06:42:28 +08:00
|
|
|
static void rdma_seq_err(struct rvt_qp *qp, struct hfi1_ibport *ibp, u32 psn,
|
2015-07-31 03:17:43 +08:00
|
|
|
struct hfi1_ctxtdata *rcd)
|
|
|
|
{
|
2016-01-20 06:42:28 +08:00
|
|
|
struct rvt_swqe *wqe;
|
2015-07-31 03:17:43 +08:00
|
|
|
|
2016-09-06 19:37:41 +08:00
|
|
|
lockdep_assert_held(&qp->s_lock);
|
2015-07-31 03:17:43 +08:00
|
|
|
/* Remove QP from retry timer */
|
2017-02-08 21:27:19 +08:00
|
|
|
rvt_stop_rc_timers(qp);
|
2015-07-31 03:17:43 +08:00
|
|
|
|
2016-01-20 06:43:33 +08:00
|
|
|
wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
|
2015-07-31 03:17:43 +08:00
|
|
|
|
|
|
|
while (cmp_psn(psn, wqe->lpsn) > 0) {
|
|
|
|
if (wqe->wr.opcode == IB_WR_RDMA_READ ||
|
2019-01-24 11:32:09 +08:00
|
|
|
wqe->wr.opcode == IB_WR_TID_RDMA_READ ||
|
2019-01-24 13:51:39 +08:00
|
|
|
wqe->wr.opcode == IB_WR_TID_RDMA_WRITE ||
|
2015-07-31 03:17:43 +08:00
|
|
|
wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
|
|
|
|
wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD)
|
|
|
|
break;
|
|
|
|
wqe = do_rc_completion(qp, wqe, ibp);
|
|
|
|
}
|
|
|
|
|
2016-01-20 06:42:39 +08:00
|
|
|
ibp->rvp.n_rdma_seq++;
|
2016-01-20 06:43:01 +08:00
|
|
|
qp->r_flags |= RVT_R_RDMAR_SEQ;
|
2017-02-08 21:27:19 +08:00
|
|
|
hfi1_restart_rc(qp, qp->s_last_psn + 1, 0);
|
2015-07-31 03:17:43 +08:00
|
|
|
if (list_empty(&qp->rspwait)) {
|
2016-01-20 06:43:01 +08:00
|
|
|
qp->r_flags |= RVT_R_RSP_SEND;
|
2016-09-06 19:34:35 +08:00
|
|
|
rvt_get_qp(qp);
|
2015-07-31 03:17:43 +08:00
|
|
|
list_add_tail(&qp->rspwait, &rcd->qp_wait_list);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* rc_rcv_resp - process an incoming RC response packet
|
2017-08-05 04:54:41 +08:00
|
|
|
* @packet: data packet information
|
2015-07-31 03:17:43 +08:00
|
|
|
*
|
|
|
|
* This is called from hfi1_rc_rcv() to process an incoming RC response
|
|
|
|
* packet for the given QP.
|
|
|
|
* Called at interrupt level.
|
|
|
|
*/
|
2017-08-05 04:54:41 +08:00
|
|
|
static void rc_rcv_resp(struct hfi1_packet *packet)
|
2015-07-31 03:17:43 +08:00
|
|
|
{
|
2017-08-05 04:54:41 +08:00
|
|
|
struct hfi1_ctxtdata *rcd = packet->rcd;
|
|
|
|
void *data = packet->payload;
|
|
|
|
u32 tlen = packet->tlen;
|
|
|
|
struct rvt_qp *qp = packet->qp;
|
2018-02-02 02:46:31 +08:00
|
|
|
struct hfi1_ibport *ibp;
|
2017-08-05 04:54:41 +08:00
|
|
|
struct ib_other_headers *ohdr = packet->ohdr;
|
2016-01-20 06:42:28 +08:00
|
|
|
struct rvt_swqe *wqe;
|
2015-07-31 03:17:43 +08:00
|
|
|
enum ib_wc_status status;
|
|
|
|
unsigned long flags;
|
|
|
|
int diff;
|
|
|
|
u64 val;
|
2017-08-05 04:54:41 +08:00
|
|
|
u32 aeth;
|
|
|
|
u32 psn = ib_bth_get_psn(packet->ohdr);
|
|
|
|
u32 pmtu = qp->pmtu;
|
|
|
|
u16 hdrsize = packet->hlen;
|
|
|
|
u8 opcode = packet->opcode;
|
|
|
|
u8 pad = packet->pad;
|
|
|
|
u8 extra_bytes = pad + packet->extra_byte + (SIZE_OF_CRC << 2);
|
2015-07-31 03:17:43 +08:00
|
|
|
|
|
|
|
spin_lock_irqsave(&qp->s_lock, flags);
|
2016-07-02 07:01:06 +08:00
|
|
|
trace_hfi1_ack(qp, psn);
|
2015-10-26 22:28:48 +08:00
|
|
|
|
2015-07-31 03:17:43 +08:00
|
|
|
/* Ignore invalid responses. */
|
2017-02-08 21:26:08 +08:00
|
|
|
if (cmp_psn(psn, READ_ONCE(qp->s_next_psn)) >= 0)
|
2015-07-31 03:17:43 +08:00
|
|
|
goto ack_done;
|
|
|
|
|
|
|
|
/* Ignore duplicate responses. */
|
|
|
|
diff = cmp_psn(psn, qp->s_last_psn);
|
|
|
|
if (unlikely(diff <= 0)) {
|
|
|
|
/* Update credits for "ghost" ACKs */
|
|
|
|
if (diff == 0 && opcode == OP(ACKNOWLEDGE)) {
|
|
|
|
aeth = be32_to_cpu(ohdr->u.aeth);
|
2017-02-08 21:28:25 +08:00
|
|
|
if ((aeth >> IB_AETH_NAK_SHIFT) == 0)
|
2017-02-08 21:27:07 +08:00
|
|
|
rvt_get_credit(qp, aeth);
|
2015-07-31 03:17:43 +08:00
|
|
|
}
|
|
|
|
goto ack_done;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Skip everything other than the PSN we expect, if we are waiting
|
|
|
|
* for a reply to a restarted RDMA read or atomic op.
|
|
|
|
*/
|
2016-01-20 06:43:01 +08:00
|
|
|
if (qp->r_flags & RVT_R_RDMAR_SEQ) {
|
2015-07-31 03:17:43 +08:00
|
|
|
if (cmp_psn(psn, qp->s_last_psn + 1) != 0)
|
|
|
|
goto ack_done;
|
2016-01-20 06:43:01 +08:00
|
|
|
qp->r_flags &= ~RVT_R_RDMAR_SEQ;
|
2015-07-31 03:17:43 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
if (unlikely(qp->s_acked == qp->s_tail))
|
|
|
|
goto ack_done;
|
2016-01-20 06:43:33 +08:00
|
|
|
wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
|
2015-07-31 03:17:43 +08:00
|
|
|
status = IB_WC_SUCCESS;
|
|
|
|
|
|
|
|
switch (opcode) {
|
|
|
|
case OP(ACKNOWLEDGE):
|
|
|
|
case OP(ATOMIC_ACKNOWLEDGE):
|
|
|
|
case OP(RDMA_READ_RESPONSE_FIRST):
|
|
|
|
aeth = be32_to_cpu(ohdr->u.aeth);
|
2016-09-06 19:35:05 +08:00
|
|
|
if (opcode == OP(ATOMIC_ACKNOWLEDGE))
|
|
|
|
val = ib_u64_get(&ohdr->u.at.atomic_ack_eth);
|
|
|
|
else
|
2015-07-31 03:17:43 +08:00
|
|
|
val = 0;
|
|
|
|
if (!do_rc_ack(qp, aeth, psn, opcode, val, rcd) ||
|
|
|
|
opcode != OP(RDMA_READ_RESPONSE_FIRST))
|
|
|
|
goto ack_done;
|
2016-01-20 06:43:33 +08:00
|
|
|
wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
|
2015-07-31 03:17:43 +08:00
|
|
|
if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ))
|
|
|
|
goto ack_op_err;
|
|
|
|
/*
|
|
|
|
* If this is a response to a resent RDMA read, we
|
|
|
|
* have to be careful to copy the data to the right
|
|
|
|
* location.
|
|
|
|
*/
|
|
|
|
qp->s_rdma_read_len = restart_sge(&qp->s_rdma_read_sge,
|
|
|
|
wqe, psn, pmtu);
|
|
|
|
goto read_middle;
|
|
|
|
|
|
|
|
case OP(RDMA_READ_RESPONSE_MIDDLE):
|
|
|
|
/* no AETH, no ACK */
|
|
|
|
if (unlikely(cmp_psn(psn, qp->s_last_psn + 1)))
|
|
|
|
goto ack_seq_err;
|
|
|
|
if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ))
|
|
|
|
goto ack_op_err;
|
|
|
|
read_middle:
|
2017-08-05 04:54:41 +08:00
|
|
|
if (unlikely(tlen != (hdrsize + pmtu + extra_bytes)))
|
2015-07-31 03:17:43 +08:00
|
|
|
goto ack_len_err;
|
|
|
|
if (unlikely(pmtu >= qp->s_rdma_read_len))
|
|
|
|
goto ack_len_err;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* We got a response so update the timeout.
|
|
|
|
* 4.096 usec. * (1 << qp->timeout)
|
|
|
|
*/
|
2017-02-08 21:27:19 +08:00
|
|
|
rvt_mod_retry_timer(qp);
|
2016-01-20 06:43:01 +08:00
|
|
|
if (qp->s_flags & RVT_S_WAIT_ACK) {
|
|
|
|
qp->s_flags &= ~RVT_S_WAIT_ACK;
|
2015-07-31 03:17:43 +08:00
|
|
|
hfi1_schedule_send(qp);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (opcode == OP(RDMA_READ_RESPONSE_MIDDLE))
|
|
|
|
qp->s_retry = qp->s_retry_cnt;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Update the RDMA receive state but do the copy w/o
|
|
|
|
* holding the locks and blocking interrupts.
|
|
|
|
*/
|
|
|
|
qp->s_rdma_read_len -= pmtu;
|
|
|
|
update_last_psn(qp, psn);
|
|
|
|
spin_unlock_irqrestore(&qp->s_lock, flags);
|
2018-09-27 01:44:33 +08:00
|
|
|
rvt_copy_sge(qp, &qp->s_rdma_read_sge,
|
|
|
|
data, pmtu, false, false);
|
2015-07-31 03:17:43 +08:00
|
|
|
goto bail;
|
|
|
|
|
|
|
|
case OP(RDMA_READ_RESPONSE_ONLY):
|
|
|
|
aeth = be32_to_cpu(ohdr->u.aeth);
|
|
|
|
if (!do_rc_ack(qp, aeth, psn, opcode, 0, rcd))
|
|
|
|
goto ack_done;
|
|
|
|
/*
|
|
|
|
* Check that the data size is >= 0 && <= pmtu.
|
|
|
|
* Remember to account for ICRC (4).
|
|
|
|
*/
|
2017-08-05 04:54:41 +08:00
|
|
|
if (unlikely(tlen < (hdrsize + extra_bytes)))
|
2015-07-31 03:17:43 +08:00
|
|
|
goto ack_len_err;
|
|
|
|
/*
|
|
|
|
* If this is a response to a resent RDMA read, we
|
|
|
|
* have to be careful to copy the data to the right
|
|
|
|
* location.
|
|
|
|
*/
|
2016-01-20 06:43:33 +08:00
|
|
|
wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
|
2015-07-31 03:17:43 +08:00
|
|
|
qp->s_rdma_read_len = restart_sge(&qp->s_rdma_read_sge,
|
|
|
|
wqe, psn, pmtu);
|
|
|
|
goto read_last;
|
|
|
|
|
|
|
|
case OP(RDMA_READ_RESPONSE_LAST):
|
|
|
|
/* ACKs READ req. */
|
|
|
|
if (unlikely(cmp_psn(psn, qp->s_last_psn + 1)))
|
|
|
|
goto ack_seq_err;
|
|
|
|
if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ))
|
|
|
|
goto ack_op_err;
|
|
|
|
/*
|
|
|
|
* Check that the data size is >= 1 && <= pmtu.
|
|
|
|
* Remember to account for ICRC (4).
|
|
|
|
*/
|
2017-08-05 04:54:41 +08:00
|
|
|
if (unlikely(tlen <= (hdrsize + extra_bytes)))
|
2015-07-31 03:17:43 +08:00
|
|
|
goto ack_len_err;
|
|
|
|
read_last:
|
2017-08-05 04:54:41 +08:00
|
|
|
tlen -= hdrsize + extra_bytes;
|
2015-07-31 03:17:43 +08:00
|
|
|
if (unlikely(tlen != qp->s_rdma_read_len))
|
|
|
|
goto ack_len_err;
|
|
|
|
aeth = be32_to_cpu(ohdr->u.aeth);
|
2018-09-27 01:44:33 +08:00
|
|
|
rvt_copy_sge(qp, &qp->s_rdma_read_sge,
|
|
|
|
data, tlen, false, false);
|
2015-07-31 03:17:43 +08:00
|
|
|
WARN_ON(qp->s_rdma_read_sge.num_sge);
|
2016-02-15 12:19:41 +08:00
|
|
|
(void)do_rc_ack(qp, aeth, psn,
|
2015-07-31 03:17:43 +08:00
|
|
|
OP(RDMA_READ_RESPONSE_LAST), 0, rcd);
|
|
|
|
goto ack_done;
|
|
|
|
}
|
|
|
|
|
|
|
|
ack_op_err:
|
|
|
|
status = IB_WC_LOC_QP_OP_ERR;
|
|
|
|
goto ack_err;
|
|
|
|
|
|
|
|
ack_seq_err:
|
2018-02-02 02:46:31 +08:00
|
|
|
ibp = rcd_to_iport(rcd);
|
2015-07-31 03:17:43 +08:00
|
|
|
rdma_seq_err(qp, ibp, psn, rcd);
|
|
|
|
goto ack_done;
|
|
|
|
|
|
|
|
ack_len_err:
|
|
|
|
status = IB_WC_LOC_LEN_ERR;
|
|
|
|
ack_err:
|
|
|
|
if (qp->s_last == qp->s_acked) {
|
2018-09-27 01:44:42 +08:00
|
|
|
rvt_send_complete(qp, wqe, status);
|
2016-01-20 06:43:44 +08:00
|
|
|
rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR);
|
2015-07-31 03:17:43 +08:00
|
|
|
}
|
|
|
|
ack_done:
|
|
|
|
spin_unlock_irqrestore(&qp->s_lock, flags);
|
|
|
|
bail:
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2016-01-20 06:42:28 +08:00
|
|
|
static inline void rc_cancel_ack(struct rvt_qp *qp)
|
2015-11-10 22:14:01 +08:00
|
|
|
{
|
2017-05-04 20:14:04 +08:00
|
|
|
qp->r_adefered = 0;
|
2015-11-10 22:14:01 +08:00
|
|
|
if (list_empty(&qp->rspwait))
|
|
|
|
return;
|
|
|
|
list_del_init(&qp->rspwait);
|
2016-01-20 06:43:01 +08:00
|
|
|
qp->r_flags &= ~RVT_R_RSP_NAK;
|
2016-09-06 19:34:35 +08:00
|
|
|
rvt_put_qp(qp);
|
2015-11-10 22:14:01 +08:00
|
|
|
}
|
|
|
|
|
2015-07-31 03:17:43 +08:00
|
|
|
/**
|
|
|
|
* rc_rcv_error - process an incoming duplicate or error RC packet
|
|
|
|
* @ohdr: the other headers for this packet
|
|
|
|
* @data: the packet data
|
|
|
|
* @qp: the QP for this packet
|
|
|
|
* @opcode: the opcode for this packet
|
|
|
|
* @psn: the packet sequence number for this packet
|
|
|
|
* @diff: the difference between the PSN and the expected PSN
|
|
|
|
*
|
|
|
|
* This is called from hfi1_rc_rcv() to process an unexpected
|
|
|
|
* incoming RC packet for the given QP.
|
|
|
|
* Called at interrupt level.
|
|
|
|
* Return 1 if no more processing is needed; otherwise return 0 to
|
|
|
|
* schedule a response to be sent.
|
|
|
|
*/
|
2016-09-06 19:35:05 +08:00
|
|
|
static noinline int rc_rcv_error(struct ib_other_headers *ohdr, void *data,
|
2016-02-15 12:21:52 +08:00
|
|
|
struct rvt_qp *qp, u32 opcode, u32 psn,
|
|
|
|
int diff, struct hfi1_ctxtdata *rcd)
|
2015-07-31 03:17:43 +08:00
|
|
|
{
|
2017-02-08 21:26:25 +08:00
|
|
|
struct hfi1_ibport *ibp = rcd_to_iport(rcd);
|
2016-01-20 06:42:28 +08:00
|
|
|
struct rvt_ack_entry *e;
|
2015-07-31 03:17:43 +08:00
|
|
|
unsigned long flags;
|
2019-01-24 11:29:44 +08:00
|
|
|
u8 prev;
|
|
|
|
u8 mra; /* most recent ACK */
|
|
|
|
bool old_req;
|
2015-07-31 03:17:43 +08:00
|
|
|
|
2016-07-02 07:01:06 +08:00
|
|
|
trace_hfi1_rcv_error(qp, psn);
|
2015-07-31 03:17:43 +08:00
|
|
|
if (diff > 0) {
|
|
|
|
/*
|
|
|
|
* Packet sequence error.
|
|
|
|
* A NAK will ACK earlier sends and RDMA writes.
|
|
|
|
* Don't queue the NAK if we already sent one.
|
|
|
|
*/
|
|
|
|
if (!qp->r_nak_state) {
|
2016-01-20 06:42:39 +08:00
|
|
|
ibp->rvp.n_rc_seqnak++;
|
2015-07-31 03:17:43 +08:00
|
|
|
qp->r_nak_state = IB_NAK_PSN_ERROR;
|
|
|
|
/* Use the expected PSN. */
|
|
|
|
qp->r_ack_psn = qp->r_psn;
|
|
|
|
/*
|
|
|
|
* Wait to send the sequence NAK until all packets
|
|
|
|
* in the receive queue have been processed.
|
|
|
|
* Otherwise, we end up propagating congestion.
|
|
|
|
*/
|
2015-11-10 22:13:55 +08:00
|
|
|
rc_defered_ack(rcd, qp);
|
2015-07-31 03:17:43 +08:00
|
|
|
}
|
|
|
|
goto done;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Handle a duplicate request. Don't re-execute SEND, RDMA
|
|
|
|
* write or atomic op. Don't NAK errors, just silently drop
|
|
|
|
* the duplicate request. Note that r_sge, r_len, and
|
|
|
|
* r_rcv_len may be in use so don't modify them.
|
|
|
|
*
|
|
|
|
* We are supposed to ACK the earliest duplicate PSN but we
|
|
|
|
* can coalesce an outstanding duplicate ACK. We have to
|
|
|
|
* send the earliest so that RDMA reads can be restarted at
|
|
|
|
* the requester's expected PSN.
|
|
|
|
*
|
|
|
|
* First, find where this duplicate PSN falls within the
|
|
|
|
* ACKs previously sent.
|
|
|
|
* old_req is true if there is an older response that is scheduled
|
|
|
|
* to be sent before sending this one.
|
|
|
|
*/
|
|
|
|
e = NULL;
|
|
|
|
old_req = 1;
|
2016-01-20 06:42:39 +08:00
|
|
|
ibp->rvp.n_rc_dupreq++;
|
2015-07-31 03:17:43 +08:00
|
|
|
|
|
|
|
spin_lock_irqsave(&qp->s_lock, flags);
|
|
|
|
|
2019-01-24 11:29:44 +08:00
|
|
|
e = find_prev_entry(qp, psn, &prev, &mra, &old_req);
|
|
|
|
|
2015-07-31 03:17:43 +08:00
|
|
|
switch (opcode) {
|
|
|
|
case OP(RDMA_READ_REQUEST): {
|
|
|
|
struct ib_reth *reth;
|
|
|
|
u32 offset;
|
|
|
|
u32 len;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If we didn't find the RDMA read request in the ack queue,
|
|
|
|
* we can ignore this request.
|
|
|
|
*/
|
|
|
|
if (!e || e->opcode != OP(RDMA_READ_REQUEST))
|
|
|
|
goto unlock_done;
|
|
|
|
/* RETH comes after BTH */
|
|
|
|
reth = &ohdr->u.rc.reth;
|
|
|
|
/*
|
|
|
|
* Address range must be a subset of the original
|
|
|
|
* request and start on pmtu boundaries.
|
|
|
|
* We reuse the old ack_queue slot since the requester
|
|
|
|
* should not back up and request an earlier PSN for the
|
|
|
|
* same request.
|
|
|
|
*/
|
|
|
|
offset = delta_psn(psn, e->psn) * qp->pmtu;
|
|
|
|
len = be32_to_cpu(reth->length);
|
|
|
|
if (unlikely(offset + len != e->rdma_sge.sge_length))
|
|
|
|
goto unlock_done;
|
2019-03-19 00:58:30 +08:00
|
|
|
release_rdma_sge_mr(e);
|
2015-07-31 03:17:43 +08:00
|
|
|
if (len != 0) {
|
|
|
|
u32 rkey = be32_to_cpu(reth->rkey);
|
2016-09-06 19:35:05 +08:00
|
|
|
u64 vaddr = get_ib_reth_vaddr(reth);
|
2015-07-31 03:17:43 +08:00
|
|
|
int ok;
|
|
|
|
|
2016-01-20 06:42:28 +08:00
|
|
|
ok = rvt_rkey_ok(qp, &e->rdma_sge, len, vaddr, rkey,
|
|
|
|
IB_ACCESS_REMOTE_READ);
|
2015-07-31 03:17:43 +08:00
|
|
|
if (unlikely(!ok))
|
|
|
|
goto unlock_done;
|
|
|
|
} else {
|
|
|
|
e->rdma_sge.vaddr = NULL;
|
|
|
|
e->rdma_sge.length = 0;
|
|
|
|
e->rdma_sge.sge_length = 0;
|
|
|
|
}
|
|
|
|
e->psn = psn;
|
|
|
|
if (old_req)
|
|
|
|
goto unlock_done;
|
IB/hfi1: Add an s_acked_ack_queue pointer
The s_ack_queue is managed by two pointers into the ring:
r_head_ack_queue and s_tail_ack_queue. r_head_ack_queue is the index of
where the next received request is going to be placed and s_tail_ack_queue
is the entry of the request currently being processed. This works
perfectly fine for normal Verbs as the requests are processed one at a
time and the s_tail_ack_queue is not moved until the request that it
points to is fully completed.
In this fashion, s_tail_ack_queue constantly chases r_head_ack_queue and
the two pointers can easily be used to determine "queue full" and "queue
empty" conditions.
The detection of these two conditions are imported in determining when an
old entry can safely be overwritten with a new received request and the
resources associated with the old request be safely released.
When pipelined TID RDMA WRITE is introduced into this mix, things look
very different. r_head_ack_queue is still the point at which a newly
received request will be inserted, s_tail_ack_queue is still the
currently processed request. However, with pipelined TID RDMA WRITE
requests, s_tail_ack_queue moves to the next request once all TID RDMA
WRITE responses for that request have been sent. The rest of the protocol
for a particular request is managed by other pointers specific to TID RDMA
- r_tid_tail and r_tid_ack - which point to the entries for which the next
TID RDMA DATA packets are going to arrive and the request for which
the next TID RDMA ACK packets are to be generated, respectively.
What this means is that entries in the ring, which are "behind"
s_tail_ack_queue (entries which s_tail_ack_queue has gone past) are no
longer considered complete. This is where the problem is - a newly
received request could potentially overwrite a still active TID RDMA WRITE
request.
The reason why the TID RDMA pointers trail s_tail_ack_queue is that the
normal Verbs send engine uses s_tail_ack_queue as the pointer for the next
response. Since TID RDMA WRITE responses are processed by the normal Verbs
send engine, s_tail_ack_queue had to be moved to the next entry once all
TID RDMA WRITE response packets were sent to get the desired pipelining
between requests. Doing otherwise would mean that the normal Verbs send
engine would not be able to send the TID RDMA WRITE responses for the next
TID RDMA request until the current one is fully completed.
This patch introduces the s_acked_ack_queue index to point to the next
request to complete on the responder side. For requests other than TID
RDMA WRITE, s_acked_ack_queue should always be kept in sync with
s_tail_ack_queue. For TID RDMA WRITE request, it may fall behind
s_tail_ack_queue.
Reviewed-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
Signed-off-by: Mitko Haralanov <mitko.haralanov@intel.com>
Signed-off-by: Kaike Wan <kaike.wan@intel.com>
Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
2019-01-24 13:48:48 +08:00
|
|
|
if (qp->s_acked_ack_queue == qp->s_tail_ack_queue)
|
|
|
|
qp->s_acked_ack_queue = prev;
|
2015-07-31 03:17:43 +08:00
|
|
|
qp->s_tail_ack_queue = prev;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
case OP(COMPARE_SWAP):
|
|
|
|
case OP(FETCH_ADD): {
|
|
|
|
/*
|
|
|
|
* If we didn't find the atomic request in the ack queue
|
2016-09-25 22:42:08 +08:00
|
|
|
* or the send engine is already backed up to send an
|
2015-07-31 03:17:43 +08:00
|
|
|
* earlier entry, we can ignore this request.
|
|
|
|
*/
|
2016-02-15 12:19:41 +08:00
|
|
|
if (!e || e->opcode != (u8)opcode || old_req)
|
2015-07-31 03:17:43 +08:00
|
|
|
goto unlock_done;
|
IB/hfi1: Add an s_acked_ack_queue pointer
The s_ack_queue is managed by two pointers into the ring:
r_head_ack_queue and s_tail_ack_queue. r_head_ack_queue is the index of
where the next received request is going to be placed and s_tail_ack_queue
is the entry of the request currently being processed. This works
perfectly fine for normal Verbs as the requests are processed one at a
time and the s_tail_ack_queue is not moved until the request that it
points to is fully completed.
In this fashion, s_tail_ack_queue constantly chases r_head_ack_queue and
the two pointers can easily be used to determine "queue full" and "queue
empty" conditions.
The detection of these two conditions are imported in determining when an
old entry can safely be overwritten with a new received request and the
resources associated with the old request be safely released.
When pipelined TID RDMA WRITE is introduced into this mix, things look
very different. r_head_ack_queue is still the point at which a newly
received request will be inserted, s_tail_ack_queue is still the
currently processed request. However, with pipelined TID RDMA WRITE
requests, s_tail_ack_queue moves to the next request once all TID RDMA
WRITE responses for that request have been sent. The rest of the protocol
for a particular request is managed by other pointers specific to TID RDMA
- r_tid_tail and r_tid_ack - which point to the entries for which the next
TID RDMA DATA packets are going to arrive and the request for which
the next TID RDMA ACK packets are to be generated, respectively.
What this means is that entries in the ring, which are "behind"
s_tail_ack_queue (entries which s_tail_ack_queue has gone past) are no
longer considered complete. This is where the problem is - a newly
received request could potentially overwrite a still active TID RDMA WRITE
request.
The reason why the TID RDMA pointers trail s_tail_ack_queue is that the
normal Verbs send engine uses s_tail_ack_queue as the pointer for the next
response. Since TID RDMA WRITE responses are processed by the normal Verbs
send engine, s_tail_ack_queue had to be moved to the next entry once all
TID RDMA WRITE response packets were sent to get the desired pipelining
between requests. Doing otherwise would mean that the normal Verbs send
engine would not be able to send the TID RDMA WRITE responses for the next
TID RDMA request until the current one is fully completed.
This patch introduces the s_acked_ack_queue index to point to the next
request to complete on the responder side. For requests other than TID
RDMA WRITE, s_acked_ack_queue should always be kept in sync with
s_tail_ack_queue. For TID RDMA WRITE request, it may fall behind
s_tail_ack_queue.
Reviewed-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
Signed-off-by: Mitko Haralanov <mitko.haralanov@intel.com>
Signed-off-by: Kaike Wan <kaike.wan@intel.com>
Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
2019-01-24 13:48:48 +08:00
|
|
|
if (qp->s_tail_ack_queue == qp->s_acked_ack_queue)
|
|
|
|
qp->s_acked_ack_queue = prev;
|
2015-07-31 03:17:43 +08:00
|
|
|
qp->s_tail_ack_queue = prev;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
default:
|
|
|
|
/*
|
|
|
|
* Ignore this operation if it doesn't request an ACK
|
|
|
|
* or an earlier RDMA read or atomic is going to be resent.
|
|
|
|
*/
|
|
|
|
if (!(psn & IB_BTH_REQ_ACK) || old_req)
|
|
|
|
goto unlock_done;
|
|
|
|
/*
|
|
|
|
* Resend the most recent ACK if this request is
|
|
|
|
* after all the previous RDMA reads and atomics.
|
|
|
|
*/
|
2019-01-24 11:29:44 +08:00
|
|
|
if (mra == qp->r_head_ack_queue) {
|
2015-07-31 03:17:43 +08:00
|
|
|
spin_unlock_irqrestore(&qp->s_lock, flags);
|
|
|
|
qp->r_nak_state = 0;
|
|
|
|
qp->r_ack_psn = qp->r_psn - 1;
|
|
|
|
goto send_ack;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Resend the RDMA read or atomic op which
|
|
|
|
* ACKs this duplicate request.
|
|
|
|
*/
|
IB/hfi1: Add an s_acked_ack_queue pointer
The s_ack_queue is managed by two pointers into the ring:
r_head_ack_queue and s_tail_ack_queue. r_head_ack_queue is the index of
where the next received request is going to be placed and s_tail_ack_queue
is the entry of the request currently being processed. This works
perfectly fine for normal Verbs as the requests are processed one at a
time and the s_tail_ack_queue is not moved until the request that it
points to is fully completed.
In this fashion, s_tail_ack_queue constantly chases r_head_ack_queue and
the two pointers can easily be used to determine "queue full" and "queue
empty" conditions.
The detection of these two conditions are imported in determining when an
old entry can safely be overwritten with a new received request and the
resources associated with the old request be safely released.
When pipelined TID RDMA WRITE is introduced into this mix, things look
very different. r_head_ack_queue is still the point at which a newly
received request will be inserted, s_tail_ack_queue is still the
currently processed request. However, with pipelined TID RDMA WRITE
requests, s_tail_ack_queue moves to the next request once all TID RDMA
WRITE responses for that request have been sent. The rest of the protocol
for a particular request is managed by other pointers specific to TID RDMA
- r_tid_tail and r_tid_ack - which point to the entries for which the next
TID RDMA DATA packets are going to arrive and the request for which
the next TID RDMA ACK packets are to be generated, respectively.
What this means is that entries in the ring, which are "behind"
s_tail_ack_queue (entries which s_tail_ack_queue has gone past) are no
longer considered complete. This is where the problem is - a newly
received request could potentially overwrite a still active TID RDMA WRITE
request.
The reason why the TID RDMA pointers trail s_tail_ack_queue is that the
normal Verbs send engine uses s_tail_ack_queue as the pointer for the next
response. Since TID RDMA WRITE responses are processed by the normal Verbs
send engine, s_tail_ack_queue had to be moved to the next entry once all
TID RDMA WRITE response packets were sent to get the desired pipelining
between requests. Doing otherwise would mean that the normal Verbs send
engine would not be able to send the TID RDMA WRITE responses for the next
TID RDMA request until the current one is fully completed.
This patch introduces the s_acked_ack_queue index to point to the next
request to complete on the responder side. For requests other than TID
RDMA WRITE, s_acked_ack_queue should always be kept in sync with
s_tail_ack_queue. For TID RDMA WRITE request, it may fall behind
s_tail_ack_queue.
Reviewed-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
Signed-off-by: Mitko Haralanov <mitko.haralanov@intel.com>
Signed-off-by: Kaike Wan <kaike.wan@intel.com>
Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
2019-01-24 13:48:48 +08:00
|
|
|
if (qp->s_tail_ack_queue == qp->s_acked_ack_queue)
|
|
|
|
qp->s_acked_ack_queue = mra;
|
2019-01-24 11:29:44 +08:00
|
|
|
qp->s_tail_ack_queue = mra;
|
2015-07-31 03:17:43 +08:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
qp->s_ack_state = OP(ACKNOWLEDGE);
|
2016-01-20 06:43:01 +08:00
|
|
|
qp->s_flags |= RVT_S_RESP_PENDING;
|
2015-07-31 03:17:43 +08:00
|
|
|
qp->r_nak_state = 0;
|
|
|
|
hfi1_schedule_send(qp);
|
|
|
|
|
|
|
|
unlock_done:
|
|
|
|
spin_unlock_irqrestore(&qp->s_lock, flags);
|
|
|
|
done:
|
|
|
|
return 1;
|
|
|
|
|
|
|
|
send_ack:
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void log_cca_event(struct hfi1_pportdata *ppd, u8 sl, u32 rlid,
|
|
|
|
u32 lqpn, u32 rqpn, u8 svc_type)
|
|
|
|
{
|
|
|
|
struct opa_hfi1_cong_log_event_internal *cc_event;
|
2015-10-26 22:28:43 +08:00
|
|
|
unsigned long flags;
|
2015-07-31 03:17:43 +08:00
|
|
|
|
|
|
|
if (sl >= OPA_MAX_SLS)
|
|
|
|
return;
|
|
|
|
|
2015-10-26 22:28:43 +08:00
|
|
|
spin_lock_irqsave(&ppd->cc_log_lock, flags);
|
2015-07-31 03:17:43 +08:00
|
|
|
|
2016-02-15 12:19:24 +08:00
|
|
|
ppd->threshold_cong_event_map[sl / 8] |= 1 << (sl % 8);
|
2015-07-31 03:17:43 +08:00
|
|
|
ppd->threshold_event_counter++;
|
|
|
|
|
|
|
|
cc_event = &ppd->cc_events[ppd->cc_log_idx++];
|
|
|
|
if (ppd->cc_log_idx == OPA_CONG_LOG_ELEMS)
|
|
|
|
ppd->cc_log_idx = 0;
|
2016-01-20 06:43:44 +08:00
|
|
|
cc_event->lqpn = lqpn & RVT_QPN_MASK;
|
|
|
|
cc_event->rqpn = rqpn & RVT_QPN_MASK;
|
2015-07-31 03:17:43 +08:00
|
|
|
cc_event->sl = sl;
|
|
|
|
cc_event->svc_type = svc_type;
|
|
|
|
cc_event->rlid = rlid;
|
|
|
|
/* keep timestamp in units of 1.024 usec */
|
2017-11-06 22:38:38 +08:00
|
|
|
cc_event->timestamp = ktime_get_ns() / 1024;
|
2015-07-31 03:17:43 +08:00
|
|
|
|
2015-10-26 22:28:43 +08:00
|
|
|
spin_unlock_irqrestore(&ppd->cc_log_lock, flags);
|
2015-07-31 03:17:43 +08:00
|
|
|
}
|
|
|
|
|
2017-08-05 04:54:41 +08:00
|
|
|
void process_becn(struct hfi1_pportdata *ppd, u8 sl, u32 rlid, u32 lqpn,
|
2015-07-31 03:17:43 +08:00
|
|
|
u32 rqpn, u8 svc_type)
|
|
|
|
{
|
|
|
|
struct cca_timer *cca_timer;
|
|
|
|
u16 ccti, ccti_incr, ccti_timer, ccti_limit;
|
|
|
|
u8 trigger_threshold;
|
|
|
|
struct cc_state *cc_state;
|
2015-10-26 22:28:43 +08:00
|
|
|
unsigned long flags;
|
2015-07-31 03:17:43 +08:00
|
|
|
|
|
|
|
if (sl >= OPA_MAX_SLS)
|
|
|
|
return;
|
|
|
|
|
|
|
|
cc_state = get_cc_state(ppd);
|
|
|
|
|
2016-02-15 12:19:49 +08:00
|
|
|
if (!cc_state)
|
2015-07-31 03:17:43 +08:00
|
|
|
return;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* 1) increase CCTI (for this SL)
|
|
|
|
* 2) select IPG (i.e., call set_link_ipg())
|
|
|
|
* 3) start timer
|
|
|
|
*/
|
|
|
|
ccti_limit = cc_state->cct.ccti_limit;
|
|
|
|
ccti_incr = cc_state->cong_setting.entries[sl].ccti_increase;
|
|
|
|
ccti_timer = cc_state->cong_setting.entries[sl].ccti_timer;
|
|
|
|
trigger_threshold =
|
|
|
|
cc_state->cong_setting.entries[sl].trigger_threshold;
|
|
|
|
|
2015-10-26 22:28:43 +08:00
|
|
|
spin_lock_irqsave(&ppd->cca_timer_lock, flags);
|
2015-07-31 03:17:43 +08:00
|
|
|
|
2016-04-14 23:31:53 +08:00
|
|
|
cca_timer = &ppd->cca_timer[sl];
|
2015-07-31 03:17:43 +08:00
|
|
|
if (cca_timer->ccti < ccti_limit) {
|
|
|
|
if (cca_timer->ccti + ccti_incr <= ccti_limit)
|
|
|
|
cca_timer->ccti += ccti_incr;
|
|
|
|
else
|
|
|
|
cca_timer->ccti = ccti_limit;
|
|
|
|
set_link_ipg(ppd);
|
|
|
|
}
|
|
|
|
|
|
|
|
ccti = cca_timer->ccti;
|
|
|
|
|
|
|
|
if (!hrtimer_active(&cca_timer->hrtimer)) {
|
|
|
|
/* ccti_timer is in units of 1.024 usec */
|
|
|
|
unsigned long nsec = 1024 * ccti_timer;
|
|
|
|
|
|
|
|
hrtimer_start(&cca_timer->hrtimer, ns_to_ktime(nsec),
|
2018-05-16 09:31:24 +08:00
|
|
|
HRTIMER_MODE_REL_PINNED);
|
2015-07-31 03:17:43 +08:00
|
|
|
}
|
|
|
|
|
2016-04-14 23:31:53 +08:00
|
|
|
spin_unlock_irqrestore(&ppd->cca_timer_lock, flags);
|
|
|
|
|
2015-07-31 03:17:43 +08:00
|
|
|
if ((trigger_threshold != 0) && (ccti >= trigger_threshold))
|
|
|
|
log_cca_event(ppd, sl, rlid, lqpn, rqpn, svc_type);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* hfi1_rc_rcv - process an incoming RC packet
|
2017-08-05 04:54:41 +08:00
|
|
|
* @packet: data packet information
|
2015-07-31 03:17:43 +08:00
|
|
|
*
|
|
|
|
* This is called from qp_rcv() to process an incoming RC packet
|
|
|
|
* for the given QP.
|
2015-10-26 22:28:43 +08:00
|
|
|
* May be called at interrupt level.
|
2015-07-31 03:17:43 +08:00
|
|
|
*/
|
|
|
|
void hfi1_rc_rcv(struct hfi1_packet *packet)
|
|
|
|
{
|
|
|
|
struct hfi1_ctxtdata *rcd = packet->rcd;
|
2017-08-05 04:53:58 +08:00
|
|
|
void *data = packet->payload;
|
2015-07-31 03:17:43 +08:00
|
|
|
u32 tlen = packet->tlen;
|
2016-01-20 06:42:28 +08:00
|
|
|
struct rvt_qp *qp = packet->qp;
|
2019-01-24 13:48:59 +08:00
|
|
|
struct hfi1_qp_priv *qpriv = qp->priv;
|
2017-02-08 21:26:25 +08:00
|
|
|
struct hfi1_ibport *ibp = rcd_to_iport(rcd);
|
2016-09-06 19:35:05 +08:00
|
|
|
struct ib_other_headers *ohdr = packet->ohdr;
|
2017-05-13 00:20:20 +08:00
|
|
|
u32 opcode = packet->opcode;
|
2015-07-31 03:17:43 +08:00
|
|
|
u32 hdrsize = packet->hlen;
|
2017-08-05 04:54:41 +08:00
|
|
|
u32 psn = ib_bth_get_psn(packet->ohdr);
|
2017-05-13 00:20:20 +08:00
|
|
|
u32 pad = packet->pad;
|
2015-07-31 03:17:43 +08:00
|
|
|
struct ib_wc wc;
|
|
|
|
u32 pmtu = qp->pmtu;
|
|
|
|
int diff;
|
|
|
|
struct ib_reth *reth;
|
|
|
|
unsigned long flags;
|
2017-04-10 01:17:30 +08:00
|
|
|
int ret;
|
2018-11-29 02:19:15 +08:00
|
|
|
bool copy_last = false, fecn;
|
2016-07-26 04:38:31 +08:00
|
|
|
u32 rkey;
|
2017-08-05 04:54:41 +08:00
|
|
|
u8 extra_bytes = pad + packet->extra_byte + (SIZE_OF_CRC << 2);
|
2015-07-31 03:17:43 +08:00
|
|
|
|
2016-09-06 19:37:41 +08:00
|
|
|
lockdep_assert_held(&qp->r_lock);
|
2017-05-13 00:20:20 +08:00
|
|
|
|
|
|
|
if (hfi1_ruc_check_hdr(ibp, packet))
|
2015-07-31 03:17:43 +08:00
|
|
|
return;
|
|
|
|
|
2018-11-29 02:19:15 +08:00
|
|
|
fecn = process_ecn(qp, packet);
|
2019-01-24 11:21:11 +08:00
|
|
|
opfn_trigger_conn_request(qp, be32_to_cpu(ohdr->bth[1]));
|
2015-07-31 03:17:43 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Process responses (ACKs) before anything else. Note that the
|
|
|
|
* packet sequence number will be for something in the send work
|
|
|
|
* queue rather than the expected receive packet sequence number.
|
|
|
|
* In other words, this QP is the requester.
|
|
|
|
*/
|
|
|
|
if (opcode >= OP(RDMA_READ_RESPONSE_FIRST) &&
|
|
|
|
opcode <= OP(ATOMIC_ACKNOWLEDGE)) {
|
2017-08-05 04:54:41 +08:00
|
|
|
rc_rcv_resp(packet);
|
2015-07-31 03:17:43 +08:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Compute 24 bits worth of difference. */
|
|
|
|
diff = delta_psn(psn, qp->r_psn);
|
|
|
|
if (unlikely(diff)) {
|
|
|
|
if (rc_rcv_error(ohdr, data, qp, opcode, psn, diff, rcd))
|
|
|
|
return;
|
|
|
|
goto send_ack;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Check for opcode sequence errors. */
|
|
|
|
switch (qp->r_state) {
|
|
|
|
case OP(SEND_FIRST):
|
|
|
|
case OP(SEND_MIDDLE):
|
|
|
|
if (opcode == OP(SEND_MIDDLE) ||
|
|
|
|
opcode == OP(SEND_LAST) ||
|
2016-07-26 04:38:31 +08:00
|
|
|
opcode == OP(SEND_LAST_WITH_IMMEDIATE) ||
|
|
|
|
opcode == OP(SEND_LAST_WITH_INVALIDATE))
|
2015-07-31 03:17:43 +08:00
|
|
|
break;
|
|
|
|
goto nack_inv;
|
|
|
|
|
|
|
|
case OP(RDMA_WRITE_FIRST):
|
|
|
|
case OP(RDMA_WRITE_MIDDLE):
|
|
|
|
if (opcode == OP(RDMA_WRITE_MIDDLE) ||
|
|
|
|
opcode == OP(RDMA_WRITE_LAST) ||
|
|
|
|
opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE))
|
|
|
|
break;
|
|
|
|
goto nack_inv;
|
|
|
|
|
|
|
|
default:
|
|
|
|
if (opcode == OP(SEND_MIDDLE) ||
|
|
|
|
opcode == OP(SEND_LAST) ||
|
|
|
|
opcode == OP(SEND_LAST_WITH_IMMEDIATE) ||
|
2016-07-26 04:38:31 +08:00
|
|
|
opcode == OP(SEND_LAST_WITH_INVALIDATE) ||
|
2015-07-31 03:17:43 +08:00
|
|
|
opcode == OP(RDMA_WRITE_MIDDLE) ||
|
|
|
|
opcode == OP(RDMA_WRITE_LAST) ||
|
|
|
|
opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE))
|
|
|
|
goto nack_inv;
|
|
|
|
/*
|
|
|
|
* Note that it is up to the requester to not send a new
|
|
|
|
* RDMA read or atomic operation before receiving an ACK
|
|
|
|
* for the previous operation.
|
|
|
|
*/
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2016-01-20 06:43:01 +08:00
|
|
|
if (qp->state == IB_QPS_RTR && !(qp->r_flags & RVT_R_COMM_EST))
|
2017-02-08 21:27:01 +08:00
|
|
|
rvt_comm_est(qp);
|
2015-07-31 03:17:43 +08:00
|
|
|
|
|
|
|
/* OK, process the packet. */
|
|
|
|
switch (opcode) {
|
|
|
|
case OP(SEND_FIRST):
|
2018-05-02 21:44:03 +08:00
|
|
|
ret = rvt_get_rwqe(qp, false);
|
2015-07-31 03:17:43 +08:00
|
|
|
if (ret < 0)
|
|
|
|
goto nack_op_err;
|
|
|
|
if (!ret)
|
|
|
|
goto rnr_nak;
|
|
|
|
qp->r_rcv_len = 0;
|
|
|
|
/* FALLTHROUGH */
|
|
|
|
case OP(SEND_MIDDLE):
|
|
|
|
case OP(RDMA_WRITE_MIDDLE):
|
|
|
|
send_middle:
|
|
|
|
/* Check for invalid length PMTU or posted rwqe len. */
|
2017-08-05 04:54:41 +08:00
|
|
|
/*
|
|
|
|
* There will be no padding for 9B packet but 16B packets
|
|
|
|
* will come in with some padding since we always add
|
|
|
|
* CRC and LT bytes which will need to be flit aligned
|
|
|
|
*/
|
|
|
|
if (unlikely(tlen != (hdrsize + pmtu + extra_bytes)))
|
2015-07-31 03:17:43 +08:00
|
|
|
goto nack_inv;
|
|
|
|
qp->r_rcv_len += pmtu;
|
|
|
|
if (unlikely(qp->r_rcv_len > qp->r_len))
|
|
|
|
goto nack_inv;
|
2018-09-27 01:44:33 +08:00
|
|
|
rvt_copy_sge(qp, &qp->r_sge, data, pmtu, true, false);
|
2015-07-31 03:17:43 +08:00
|
|
|
break;
|
|
|
|
|
|
|
|
case OP(RDMA_WRITE_LAST_WITH_IMMEDIATE):
|
|
|
|
/* consume RWQE */
|
2018-05-02 21:44:03 +08:00
|
|
|
ret = rvt_get_rwqe(qp, true);
|
2015-07-31 03:17:43 +08:00
|
|
|
if (ret < 0)
|
|
|
|
goto nack_op_err;
|
|
|
|
if (!ret)
|
|
|
|
goto rnr_nak;
|
|
|
|
goto send_last_imm;
|
|
|
|
|
|
|
|
case OP(SEND_ONLY):
|
|
|
|
case OP(SEND_ONLY_WITH_IMMEDIATE):
|
2016-07-26 04:38:31 +08:00
|
|
|
case OP(SEND_ONLY_WITH_INVALIDATE):
|
2018-05-02 21:44:03 +08:00
|
|
|
ret = rvt_get_rwqe(qp, false);
|
2015-07-31 03:17:43 +08:00
|
|
|
if (ret < 0)
|
|
|
|
goto nack_op_err;
|
|
|
|
if (!ret)
|
|
|
|
goto rnr_nak;
|
|
|
|
qp->r_rcv_len = 0;
|
|
|
|
if (opcode == OP(SEND_ONLY))
|
|
|
|
goto no_immediate_data;
|
2016-07-26 04:38:31 +08:00
|
|
|
if (opcode == OP(SEND_ONLY_WITH_INVALIDATE))
|
|
|
|
goto send_last_inv;
|
2017-10-12 01:48:57 +08:00
|
|
|
/* FALLTHROUGH -- for SEND_ONLY_WITH_IMMEDIATE */
|
2015-07-31 03:17:43 +08:00
|
|
|
case OP(SEND_LAST_WITH_IMMEDIATE):
|
|
|
|
send_last_imm:
|
|
|
|
wc.ex.imm_data = ohdr->u.imm_data;
|
|
|
|
wc.wc_flags = IB_WC_WITH_IMM;
|
|
|
|
goto send_last;
|
2016-07-26 04:38:31 +08:00
|
|
|
case OP(SEND_LAST_WITH_INVALIDATE):
|
|
|
|
send_last_inv:
|
|
|
|
rkey = be32_to_cpu(ohdr->u.ieth);
|
|
|
|
if (rvt_invalidate_rkey(qp, rkey))
|
|
|
|
goto no_immediate_data;
|
|
|
|
wc.ex.invalidate_rkey = rkey;
|
|
|
|
wc.wc_flags = IB_WC_WITH_INVALIDATE;
|
|
|
|
goto send_last;
|
2015-07-31 03:17:43 +08:00
|
|
|
case OP(RDMA_WRITE_LAST):
|
2017-02-08 21:27:31 +08:00
|
|
|
copy_last = rvt_is_user_qp(qp);
|
2016-02-04 06:35:49 +08:00
|
|
|
/* fall through */
|
|
|
|
case OP(SEND_LAST):
|
2015-07-31 03:17:43 +08:00
|
|
|
no_immediate_data:
|
|
|
|
wc.wc_flags = 0;
|
|
|
|
wc.ex.imm_data = 0;
|
|
|
|
send_last:
|
|
|
|
/* Check for invalid length. */
|
|
|
|
/* LAST len should be >= 1 */
|
2017-08-05 04:54:41 +08:00
|
|
|
if (unlikely(tlen < (hdrsize + extra_bytes)))
|
2015-07-31 03:17:43 +08:00
|
|
|
goto nack_inv;
|
2017-08-05 04:54:41 +08:00
|
|
|
/* Don't count the CRC(and padding and LT byte for 16B). */
|
|
|
|
tlen -= (hdrsize + extra_bytes);
|
2015-07-31 03:17:43 +08:00
|
|
|
wc.byte_len = tlen + qp->r_rcv_len;
|
|
|
|
if (unlikely(wc.byte_len > qp->r_len))
|
|
|
|
goto nack_inv;
|
2018-09-27 01:44:33 +08:00
|
|
|
rvt_copy_sge(qp, &qp->r_sge, data, tlen, true, copy_last);
|
2016-01-20 06:43:44 +08:00
|
|
|
rvt_put_ss(&qp->r_sge);
|
2015-07-31 03:17:43 +08:00
|
|
|
qp->r_msn++;
|
2016-12-08 11:32:28 +08:00
|
|
|
if (!__test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags))
|
2015-07-31 03:17:43 +08:00
|
|
|
break;
|
|
|
|
wc.wr_id = qp->r_wr_id;
|
|
|
|
wc.status = IB_WC_SUCCESS;
|
|
|
|
if (opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE) ||
|
|
|
|
opcode == OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE))
|
|
|
|
wc.opcode = IB_WC_RECV_RDMA_WITH_IMM;
|
|
|
|
else
|
|
|
|
wc.opcode = IB_WC_RECV;
|
|
|
|
wc.qp = &qp->ibqp;
|
|
|
|
wc.src_qp = qp->remote_qpn;
|
2017-11-06 22:39:22 +08:00
|
|
|
wc.slid = rdma_ah_get_dlid(&qp->remote_ah_attr) & U16_MAX;
|
2015-07-31 03:17:43 +08:00
|
|
|
/*
|
|
|
|
* It seems that IB mandates the presence of an SL in a
|
|
|
|
* work completion only for the UD transport (see section
|
|
|
|
* 11.4.2 of IBTA Vol. 1).
|
|
|
|
*
|
|
|
|
* However, the way the SL is chosen below is consistent
|
|
|
|
* with the way that IB/qib works and is trying avoid
|
|
|
|
* introducing incompatibilities.
|
|
|
|
*
|
|
|
|
* See also OPA Vol. 1, section 9.7.6, and table 9-17.
|
|
|
|
*/
|
2017-04-30 02:41:28 +08:00
|
|
|
wc.sl = rdma_ah_get_sl(&qp->remote_ah_attr);
|
2015-07-31 03:17:43 +08:00
|
|
|
/* zero fields that are N/A */
|
|
|
|
wc.vendor_err = 0;
|
|
|
|
wc.pkey_index = 0;
|
|
|
|
wc.dlid_path_bits = 0;
|
|
|
|
wc.port_num = 0;
|
|
|
|
/* Signal completion event if the solicited bit is set. */
|
2019-06-29 02:21:52 +08:00
|
|
|
rvt_recv_cq(qp, &wc, ib_bth_is_solicited(ohdr));
|
2015-07-31 03:17:43 +08:00
|
|
|
break;
|
|
|
|
|
|
|
|
case OP(RDMA_WRITE_ONLY):
|
2017-02-08 21:27:31 +08:00
|
|
|
copy_last = rvt_is_user_qp(qp);
|
2016-02-04 06:35:49 +08:00
|
|
|
/* fall through */
|
|
|
|
case OP(RDMA_WRITE_FIRST):
|
2015-07-31 03:17:43 +08:00
|
|
|
case OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE):
|
|
|
|
if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE)))
|
|
|
|
goto nack_inv;
|
|
|
|
/* consume RWQE */
|
|
|
|
reth = &ohdr->u.rc.reth;
|
|
|
|
qp->r_len = be32_to_cpu(reth->length);
|
|
|
|
qp->r_rcv_len = 0;
|
|
|
|
qp->r_sge.sg_list = NULL;
|
|
|
|
if (qp->r_len != 0) {
|
|
|
|
u32 rkey = be32_to_cpu(reth->rkey);
|
2016-09-06 19:35:05 +08:00
|
|
|
u64 vaddr = get_ib_reth_vaddr(reth);
|
2015-07-31 03:17:43 +08:00
|
|
|
int ok;
|
|
|
|
|
|
|
|
/* Check rkey & NAK */
|
2016-01-20 06:42:28 +08:00
|
|
|
ok = rvt_rkey_ok(qp, &qp->r_sge.sge, qp->r_len, vaddr,
|
|
|
|
rkey, IB_ACCESS_REMOTE_WRITE);
|
2015-07-31 03:17:43 +08:00
|
|
|
if (unlikely(!ok))
|
|
|
|
goto nack_acc;
|
|
|
|
qp->r_sge.num_sge = 1;
|
|
|
|
} else {
|
|
|
|
qp->r_sge.num_sge = 0;
|
|
|
|
qp->r_sge.sge.mr = NULL;
|
|
|
|
qp->r_sge.sge.vaddr = NULL;
|
|
|
|
qp->r_sge.sge.length = 0;
|
|
|
|
qp->r_sge.sge.sge_length = 0;
|
|
|
|
}
|
|
|
|
if (opcode == OP(RDMA_WRITE_FIRST))
|
|
|
|
goto send_middle;
|
|
|
|
else if (opcode == OP(RDMA_WRITE_ONLY))
|
|
|
|
goto no_immediate_data;
|
2018-05-02 21:44:03 +08:00
|
|
|
ret = rvt_get_rwqe(qp, true);
|
2015-07-31 03:17:43 +08:00
|
|
|
if (ret < 0)
|
|
|
|
goto nack_op_err;
|
2017-05-13 00:02:00 +08:00
|
|
|
if (!ret) {
|
|
|
|
/* peer will send again */
|
|
|
|
rvt_put_ss(&qp->r_sge);
|
2015-07-31 03:17:43 +08:00
|
|
|
goto rnr_nak;
|
2017-05-13 00:02:00 +08:00
|
|
|
}
|
2015-07-31 03:17:43 +08:00
|
|
|
wc.ex.imm_data = ohdr->u.rc.imm_data;
|
|
|
|
wc.wc_flags = IB_WC_WITH_IMM;
|
|
|
|
goto send_last;
|
|
|
|
|
|
|
|
case OP(RDMA_READ_REQUEST): {
|
2016-01-20 06:42:28 +08:00
|
|
|
struct rvt_ack_entry *e;
|
2015-07-31 03:17:43 +08:00
|
|
|
u32 len;
|
|
|
|
u8 next;
|
|
|
|
|
|
|
|
if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_READ)))
|
|
|
|
goto nack_inv;
|
|
|
|
next = qp->r_head_ack_queue + 1;
|
2019-01-24 11:21:01 +08:00
|
|
|
/* s_ack_queue is size rvt_size_atomic()+1 so use > not >= */
|
|
|
|
if (next > rvt_size_atomic(ib_to_rvt(qp->ibqp.device)))
|
2015-07-31 03:17:43 +08:00
|
|
|
next = 0;
|
|
|
|
spin_lock_irqsave(&qp->s_lock, flags);
|
IB/hfi1: Add an s_acked_ack_queue pointer
The s_ack_queue is managed by two pointers into the ring:
r_head_ack_queue and s_tail_ack_queue. r_head_ack_queue is the index of
where the next received request is going to be placed and s_tail_ack_queue
is the entry of the request currently being processed. This works
perfectly fine for normal Verbs as the requests are processed one at a
time and the s_tail_ack_queue is not moved until the request that it
points to is fully completed.
In this fashion, s_tail_ack_queue constantly chases r_head_ack_queue and
the two pointers can easily be used to determine "queue full" and "queue
empty" conditions.
The detection of these two conditions are imported in determining when an
old entry can safely be overwritten with a new received request and the
resources associated with the old request be safely released.
When pipelined TID RDMA WRITE is introduced into this mix, things look
very different. r_head_ack_queue is still the point at which a newly
received request will be inserted, s_tail_ack_queue is still the
currently processed request. However, with pipelined TID RDMA WRITE
requests, s_tail_ack_queue moves to the next request once all TID RDMA
WRITE responses for that request have been sent. The rest of the protocol
for a particular request is managed by other pointers specific to TID RDMA
- r_tid_tail and r_tid_ack - which point to the entries for which the next
TID RDMA DATA packets are going to arrive and the request for which
the next TID RDMA ACK packets are to be generated, respectively.
What this means is that entries in the ring, which are "behind"
s_tail_ack_queue (entries which s_tail_ack_queue has gone past) are no
longer considered complete. This is where the problem is - a newly
received request could potentially overwrite a still active TID RDMA WRITE
request.
The reason why the TID RDMA pointers trail s_tail_ack_queue is that the
normal Verbs send engine uses s_tail_ack_queue as the pointer for the next
response. Since TID RDMA WRITE responses are processed by the normal Verbs
send engine, s_tail_ack_queue had to be moved to the next entry once all
TID RDMA WRITE response packets were sent to get the desired pipelining
between requests. Doing otherwise would mean that the normal Verbs send
engine would not be able to send the TID RDMA WRITE responses for the next
TID RDMA request until the current one is fully completed.
This patch introduces the s_acked_ack_queue index to point to the next
request to complete on the responder side. For requests other than TID
RDMA WRITE, s_acked_ack_queue should always be kept in sync with
s_tail_ack_queue. For TID RDMA WRITE request, it may fall behind
s_tail_ack_queue.
Reviewed-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
Signed-off-by: Mitko Haralanov <mitko.haralanov@intel.com>
Signed-off-by: Kaike Wan <kaike.wan@intel.com>
Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
2019-01-24 13:48:48 +08:00
|
|
|
if (unlikely(next == qp->s_acked_ack_queue)) {
|
2015-07-31 03:17:43 +08:00
|
|
|
if (!qp->s_ack_queue[next].sent)
|
|
|
|
goto nack_inv_unlck;
|
|
|
|
update_ack_queue(qp, next);
|
|
|
|
}
|
|
|
|
e = &qp->s_ack_queue[qp->r_head_ack_queue];
|
2019-03-19 00:58:30 +08:00
|
|
|
release_rdma_sge_mr(e);
|
2015-07-31 03:17:43 +08:00
|
|
|
reth = &ohdr->u.rc.reth;
|
|
|
|
len = be32_to_cpu(reth->length);
|
|
|
|
if (len) {
|
|
|
|
u32 rkey = be32_to_cpu(reth->rkey);
|
2016-09-06 19:35:05 +08:00
|
|
|
u64 vaddr = get_ib_reth_vaddr(reth);
|
2015-07-31 03:17:43 +08:00
|
|
|
int ok;
|
|
|
|
|
|
|
|
/* Check rkey & NAK */
|
2016-01-20 06:42:28 +08:00
|
|
|
ok = rvt_rkey_ok(qp, &e->rdma_sge, len, vaddr,
|
|
|
|
rkey, IB_ACCESS_REMOTE_READ);
|
2015-07-31 03:17:43 +08:00
|
|
|
if (unlikely(!ok))
|
|
|
|
goto nack_acc_unlck;
|
|
|
|
/*
|
|
|
|
* Update the next expected PSN. We add 1 later
|
|
|
|
* below, so only add the remainder here.
|
|
|
|
*/
|
2016-12-08 11:34:37 +08:00
|
|
|
qp->r_psn += rvt_div_mtu(qp, len - 1);
|
2015-07-31 03:17:43 +08:00
|
|
|
} else {
|
|
|
|
e->rdma_sge.mr = NULL;
|
|
|
|
e->rdma_sge.vaddr = NULL;
|
|
|
|
e->rdma_sge.length = 0;
|
|
|
|
e->rdma_sge.sge_length = 0;
|
|
|
|
}
|
|
|
|
e->opcode = opcode;
|
|
|
|
e->sent = 0;
|
|
|
|
e->psn = psn;
|
|
|
|
e->lpsn = qp->r_psn;
|
|
|
|
/*
|
|
|
|
* We need to increment the MSN here instead of when we
|
|
|
|
* finish sending the result since a duplicate request would
|
|
|
|
* increment it more than once.
|
|
|
|
*/
|
|
|
|
qp->r_msn++;
|
|
|
|
qp->r_psn++;
|
|
|
|
qp->r_state = opcode;
|
|
|
|
qp->r_nak_state = 0;
|
|
|
|
qp->r_head_ack_queue = next;
|
2019-01-24 13:48:59 +08:00
|
|
|
qpriv->r_tid_alloc = qp->r_head_ack_queue;
|
2015-07-31 03:17:43 +08:00
|
|
|
|
2016-09-25 22:42:08 +08:00
|
|
|
/* Schedule the send engine. */
|
2016-01-20 06:43:01 +08:00
|
|
|
qp->s_flags |= RVT_S_RESP_PENDING;
|
2018-11-29 02:19:15 +08:00
|
|
|
if (fecn)
|
|
|
|
qp->s_flags |= RVT_S_ECN;
|
2015-07-31 03:17:43 +08:00
|
|
|
hfi1_schedule_send(qp);
|
|
|
|
|
|
|
|
spin_unlock_irqrestore(&qp->s_lock, flags);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
case OP(COMPARE_SWAP):
|
|
|
|
case OP(FETCH_ADD): {
|
2019-01-24 11:21:11 +08:00
|
|
|
struct ib_atomic_eth *ateth = &ohdr->u.atomic_eth;
|
|
|
|
u64 vaddr = get_ib_ateth_vaddr(ateth);
|
|
|
|
bool opfn = opcode == OP(COMPARE_SWAP) &&
|
|
|
|
vaddr == HFI1_VERBS_E_ATOMIC_VADDR;
|
2016-01-20 06:42:28 +08:00
|
|
|
struct rvt_ack_entry *e;
|
2015-07-31 03:17:43 +08:00
|
|
|
atomic64_t *maddr;
|
|
|
|
u64 sdata;
|
|
|
|
u32 rkey;
|
|
|
|
u8 next;
|
|
|
|
|
2019-01-24 11:21:11 +08:00
|
|
|
if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC) &&
|
|
|
|
!opfn))
|
2015-07-31 03:17:43 +08:00
|
|
|
goto nack_inv;
|
|
|
|
next = qp->r_head_ack_queue + 1;
|
2019-01-24 11:21:01 +08:00
|
|
|
if (next > rvt_size_atomic(ib_to_rvt(qp->ibqp.device)))
|
2015-07-31 03:17:43 +08:00
|
|
|
next = 0;
|
|
|
|
spin_lock_irqsave(&qp->s_lock, flags);
|
IB/hfi1: Add an s_acked_ack_queue pointer
The s_ack_queue is managed by two pointers into the ring:
r_head_ack_queue and s_tail_ack_queue. r_head_ack_queue is the index of
where the next received request is going to be placed and s_tail_ack_queue
is the entry of the request currently being processed. This works
perfectly fine for normal Verbs as the requests are processed one at a
time and the s_tail_ack_queue is not moved until the request that it
points to is fully completed.
In this fashion, s_tail_ack_queue constantly chases r_head_ack_queue and
the two pointers can easily be used to determine "queue full" and "queue
empty" conditions.
The detection of these two conditions are imported in determining when an
old entry can safely be overwritten with a new received request and the
resources associated with the old request be safely released.
When pipelined TID RDMA WRITE is introduced into this mix, things look
very different. r_head_ack_queue is still the point at which a newly
received request will be inserted, s_tail_ack_queue is still the
currently processed request. However, with pipelined TID RDMA WRITE
requests, s_tail_ack_queue moves to the next request once all TID RDMA
WRITE responses for that request have been sent. The rest of the protocol
for a particular request is managed by other pointers specific to TID RDMA
- r_tid_tail and r_tid_ack - which point to the entries for which the next
TID RDMA DATA packets are going to arrive and the request for which
the next TID RDMA ACK packets are to be generated, respectively.
What this means is that entries in the ring, which are "behind"
s_tail_ack_queue (entries which s_tail_ack_queue has gone past) are no
longer considered complete. This is where the problem is - a newly
received request could potentially overwrite a still active TID RDMA WRITE
request.
The reason why the TID RDMA pointers trail s_tail_ack_queue is that the
normal Verbs send engine uses s_tail_ack_queue as the pointer for the next
response. Since TID RDMA WRITE responses are processed by the normal Verbs
send engine, s_tail_ack_queue had to be moved to the next entry once all
TID RDMA WRITE response packets were sent to get the desired pipelining
between requests. Doing otherwise would mean that the normal Verbs send
engine would not be able to send the TID RDMA WRITE responses for the next
TID RDMA request until the current one is fully completed.
This patch introduces the s_acked_ack_queue index to point to the next
request to complete on the responder side. For requests other than TID
RDMA WRITE, s_acked_ack_queue should always be kept in sync with
s_tail_ack_queue. For TID RDMA WRITE request, it may fall behind
s_tail_ack_queue.
Reviewed-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
Signed-off-by: Mitko Haralanov <mitko.haralanov@intel.com>
Signed-off-by: Kaike Wan <kaike.wan@intel.com>
Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
2019-01-24 13:48:48 +08:00
|
|
|
if (unlikely(next == qp->s_acked_ack_queue)) {
|
2015-07-31 03:17:43 +08:00
|
|
|
if (!qp->s_ack_queue[next].sent)
|
|
|
|
goto nack_inv_unlck;
|
|
|
|
update_ack_queue(qp, next);
|
|
|
|
}
|
|
|
|
e = &qp->s_ack_queue[qp->r_head_ack_queue];
|
2019-03-19 00:58:30 +08:00
|
|
|
release_rdma_sge_mr(e);
|
2019-01-24 11:21:11 +08:00
|
|
|
/* Process OPFN special virtual address */
|
|
|
|
if (opfn) {
|
|
|
|
opfn_conn_response(qp, e, ateth);
|
|
|
|
goto ack;
|
|
|
|
}
|
2015-07-31 03:17:43 +08:00
|
|
|
if (unlikely(vaddr & (sizeof(u64) - 1)))
|
|
|
|
goto nack_inv_unlck;
|
|
|
|
rkey = be32_to_cpu(ateth->rkey);
|
|
|
|
/* Check rkey & NAK */
|
2016-01-20 06:42:28 +08:00
|
|
|
if (unlikely(!rvt_rkey_ok(qp, &qp->r_sge.sge, sizeof(u64),
|
|
|
|
vaddr, rkey,
|
|
|
|
IB_ACCESS_REMOTE_ATOMIC)))
|
2015-07-31 03:17:43 +08:00
|
|
|
goto nack_acc_unlck;
|
|
|
|
/* Perform atomic OP and save result. */
|
2016-02-15 12:19:41 +08:00
|
|
|
maddr = (atomic64_t *)qp->r_sge.sge.vaddr;
|
2016-09-06 19:35:05 +08:00
|
|
|
sdata = get_ib_ateth_swap(ateth);
|
2015-07-31 03:17:43 +08:00
|
|
|
e->atomic_data = (opcode == OP(FETCH_ADD)) ?
|
2016-02-15 12:19:41 +08:00
|
|
|
(u64)atomic64_add_return(sdata, maddr) - sdata :
|
|
|
|
(u64)cmpxchg((u64 *)qp->r_sge.sge.vaddr,
|
2016-09-06 19:35:05 +08:00
|
|
|
get_ib_ateth_compare(ateth),
|
2015-07-31 03:17:43 +08:00
|
|
|
sdata);
|
2016-01-20 06:42:28 +08:00
|
|
|
rvt_put_mr(qp->r_sge.sge.mr);
|
2015-07-31 03:17:43 +08:00
|
|
|
qp->r_sge.num_sge = 0;
|
2019-01-24 11:21:11 +08:00
|
|
|
ack:
|
2015-07-31 03:17:43 +08:00
|
|
|
e->opcode = opcode;
|
|
|
|
e->sent = 0;
|
|
|
|
e->psn = psn;
|
|
|
|
e->lpsn = psn;
|
|
|
|
qp->r_msn++;
|
|
|
|
qp->r_psn++;
|
|
|
|
qp->r_state = opcode;
|
|
|
|
qp->r_nak_state = 0;
|
|
|
|
qp->r_head_ack_queue = next;
|
2019-01-24 13:48:59 +08:00
|
|
|
qpriv->r_tid_alloc = qp->r_head_ack_queue;
|
2015-07-31 03:17:43 +08:00
|
|
|
|
2016-09-25 22:42:08 +08:00
|
|
|
/* Schedule the send engine. */
|
2016-01-20 06:43:01 +08:00
|
|
|
qp->s_flags |= RVT_S_RESP_PENDING;
|
2018-11-29 02:19:15 +08:00
|
|
|
if (fecn)
|
|
|
|
qp->s_flags |= RVT_S_ECN;
|
2015-07-31 03:17:43 +08:00
|
|
|
hfi1_schedule_send(qp);
|
|
|
|
|
|
|
|
spin_unlock_irqrestore(&qp->s_lock, flags);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
default:
|
|
|
|
/* NAK unknown opcodes. */
|
|
|
|
goto nack_inv;
|
|
|
|
}
|
|
|
|
qp->r_psn++;
|
|
|
|
qp->r_state = opcode;
|
|
|
|
qp->r_ack_psn = psn;
|
|
|
|
qp->r_nak_state = 0;
|
|
|
|
/* Send an ACK if requested or required. */
|
2018-11-29 02:19:15 +08:00
|
|
|
if (psn & IB_BTH_REQ_ACK || fecn) {
|
|
|
|
if (packet->numpkt == 0 || fecn ||
|
|
|
|
qp->r_adefered >= HFI1_PSN_CREDIT) {
|
2015-11-10 22:14:01 +08:00
|
|
|
rc_cancel_ack(qp);
|
|
|
|
goto send_ack;
|
|
|
|
}
|
2017-05-04 20:14:04 +08:00
|
|
|
qp->r_adefered++;
|
2015-11-10 22:14:01 +08:00
|
|
|
rc_defered_ack(rcd, qp);
|
|
|
|
}
|
2015-07-31 03:17:43 +08:00
|
|
|
return;
|
|
|
|
|
|
|
|
rnr_nak:
|
2016-03-06 00:49:29 +08:00
|
|
|
qp->r_nak_state = qp->r_min_rnr_timer | IB_RNR_NAK;
|
2015-07-31 03:17:43 +08:00
|
|
|
qp->r_ack_psn = qp->r_psn;
|
|
|
|
/* Queue RNR NAK for later */
|
2015-11-10 22:13:55 +08:00
|
|
|
rc_defered_ack(rcd, qp);
|
2015-07-31 03:17:43 +08:00
|
|
|
return;
|
|
|
|
|
|
|
|
nack_op_err:
|
2017-02-08 21:27:01 +08:00
|
|
|
rvt_rc_error(qp, IB_WC_LOC_QP_OP_ERR);
|
2015-07-31 03:17:43 +08:00
|
|
|
qp->r_nak_state = IB_NAK_REMOTE_OPERATIONAL_ERROR;
|
|
|
|
qp->r_ack_psn = qp->r_psn;
|
|
|
|
/* Queue NAK for later */
|
2015-11-10 22:13:55 +08:00
|
|
|
rc_defered_ack(rcd, qp);
|
2015-07-31 03:17:43 +08:00
|
|
|
return;
|
|
|
|
|
|
|
|
nack_inv_unlck:
|
|
|
|
spin_unlock_irqrestore(&qp->s_lock, flags);
|
|
|
|
nack_inv:
|
2017-02-08 21:27:01 +08:00
|
|
|
rvt_rc_error(qp, IB_WC_LOC_QP_OP_ERR);
|
2015-07-31 03:17:43 +08:00
|
|
|
qp->r_nak_state = IB_NAK_INVALID_REQUEST;
|
|
|
|
qp->r_ack_psn = qp->r_psn;
|
|
|
|
/* Queue NAK for later */
|
2015-11-10 22:13:55 +08:00
|
|
|
rc_defered_ack(rcd, qp);
|
2015-07-31 03:17:43 +08:00
|
|
|
return;
|
|
|
|
|
|
|
|
nack_acc_unlck:
|
|
|
|
spin_unlock_irqrestore(&qp->s_lock, flags);
|
|
|
|
nack_acc:
|
2017-02-08 21:27:01 +08:00
|
|
|
rvt_rc_error(qp, IB_WC_LOC_PROT_ERR);
|
2015-07-31 03:17:43 +08:00
|
|
|
qp->r_nak_state = IB_NAK_REMOTE_ACCESS_ERROR;
|
|
|
|
qp->r_ack_psn = qp->r_psn;
|
|
|
|
send_ack:
|
2018-11-29 02:19:15 +08:00
|
|
|
hfi1_send_rc_ack(packet, fecn);
|
2015-07-31 03:17:43 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
void hfi1_rc_hdrerr(
|
|
|
|
struct hfi1_ctxtdata *rcd,
|
2017-05-13 00:20:20 +08:00
|
|
|
struct hfi1_packet *packet,
|
2016-01-20 06:42:28 +08:00
|
|
|
struct rvt_qp *qp)
|
2015-07-31 03:17:43 +08:00
|
|
|
{
|
2017-02-08 21:26:25 +08:00
|
|
|
struct hfi1_ibport *ibp = rcd_to_iport(rcd);
|
2015-07-31 03:17:43 +08:00
|
|
|
int diff;
|
2015-09-20 22:07:15 +08:00
|
|
|
u32 opcode;
|
2017-05-13 00:20:20 +08:00
|
|
|
u32 psn;
|
2015-07-31 03:17:43 +08:00
|
|
|
|
2017-05-13 00:20:20 +08:00
|
|
|
if (hfi1_ruc_check_hdr(ibp, packet))
|
2015-07-31 03:17:43 +08:00
|
|
|
return;
|
|
|
|
|
2017-05-13 00:20:20 +08:00
|
|
|
psn = ib_bth_get_psn(packet->ohdr);
|
|
|
|
opcode = ib_bth_get_opcode(packet->ohdr);
|
2015-07-31 03:17:43 +08:00
|
|
|
|
|
|
|
/* Only deal with RDMA Writes for now */
|
|
|
|
if (opcode < IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST) {
|
|
|
|
diff = delta_psn(psn, qp->r_psn);
|
|
|
|
if (!qp->r_nak_state && diff >= 0) {
|
2016-01-20 06:42:39 +08:00
|
|
|
ibp->rvp.n_rc_seqnak++;
|
2015-07-31 03:17:43 +08:00
|
|
|
qp->r_nak_state = IB_NAK_PSN_ERROR;
|
|
|
|
/* Use the expected PSN. */
|
|
|
|
qp->r_ack_psn = qp->r_psn;
|
|
|
|
/*
|
|
|
|
* Wait to send the sequence
|
|
|
|
* NAK until all packets
|
|
|
|
* in the receive queue have
|
|
|
|
* been processed.
|
|
|
|
* Otherwise, we end up
|
|
|
|
* propagating congestion.
|
|
|
|
*/
|
2015-11-10 22:13:55 +08:00
|
|
|
rc_defered_ack(rcd, qp);
|
2015-07-31 03:17:43 +08:00
|
|
|
} /* Out of sequence NAK */
|
|
|
|
} /* QP Request NAKs */
|
|
|
|
}
|