OpenCloudOS-Kernel/drivers/scsi/qedf/qedf_els.c

1047 lines
27 KiB
C
Raw Normal View History

// SPDX-License-Identifier: GPL-2.0-only
/*
* QLogic FCoE Offload Driver
* Copyright (c) 2016-2018 Cavium Inc.
*/
#include "qedf.h"
/* It's assumed that the lock is held when calling this function. */
static int qedf_initiate_els(struct qedf_rport *fcport, unsigned int op,
void *data, uint32_t data_len,
void (*cb_func)(struct qedf_els_cb_arg *cb_arg),
struct qedf_els_cb_arg *cb_arg, uint32_t timer_msec)
{
struct qedf_ctx *qedf;
struct fc_lport *lport;
struct qedf_ioreq *els_req;
struct qedf_mp_req *mp_req;
struct fc_frame_header *fc_hdr;
struct e4_fcoe_task_context *task;
int rc = 0;
uint32_t did, sid;
uint16_t xid;
struct fcoe_wqe *sqe;
unsigned long flags;
u16 sqe_idx;
if (!fcport) {
QEDF_ERR(NULL, "fcport is NULL");
rc = -EINVAL;
goto els_err;
}
qedf = fcport->qedf;
lport = qedf->lport;
QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_ELS, "Sending ELS\n");
rc = fc_remote_port_chkready(fcport->rport);
if (rc) {
QEDF_ERR(&(qedf->dbg_ctx), "els 0x%x: rport not ready\n", op);
rc = -EAGAIN;
goto els_err;
}
if (lport->state != LPORT_ST_READY || !(lport->link_up)) {
QEDF_ERR(&(qedf->dbg_ctx), "els 0x%x: link is not ready\n",
op);
rc = -EAGAIN;
goto els_err;
}
if (!test_bit(QEDF_RPORT_SESSION_READY, &fcport->flags)) {
QEDF_ERR(&(qedf->dbg_ctx), "els 0x%x: fcport not ready\n", op);
rc = -EINVAL;
goto els_err;
}
els_req = qedf_alloc_cmd(fcport, QEDF_ELS);
if (!els_req) {
scsi: qedf: Do not retry ELS request if qedf_alloc_cmd fails If we cannot allocate an ELS middlepath request, simply fail instead of trying to delay and then reallocate. This delay logic is causing soft lockup messages: NMI watchdog: BUG: soft lockup - CPU#2 stuck for 22s! [kworker/2:1:7639] Modules linked in: xt_CHECKSUM ipt_MASQUERADE nf_nat_masquerade_ipv4 tun devlink ip6t_rpfilter ipt_REJECT nf_reject_ipv4 ip6t_REJECT nf_reject_ipv6 xt_conntrack ip_set nfnetlink ebtable_nat ebtable_broute bridge stp llc ip6table_nat nf_conntrack_ipv6 nf_defrag_ipv6 nf_nat_ipv6 ip6table_mangle ip6table_security ip6table_raw iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 nf_nat nf_conntrack iptable_mangle iptable_security iptable_raw ebtable_filter ebtables ip6table_filter ip6_tables iptable_filter dm_service_time vfat fat rpcrdma sunrpc ib_isert iscsi_target_mod ib_iser libiscsi scsi_transport_iscsi ib_srpt target_core_mod ib_srp scsi_transport_srp ib_ipoib rdma_ucm ib_ucm ib_uverbs ib_umad rdma_cm ib_cm iw_cm sb_edac intel_powerclamp coretemp intel_rapl iosf_mbi kvm_intel kvm irqbypass crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd iTCO_wdt iTCO_vendor_support qedr(OE) ib_core joydev ipmi_ssif pcspkr hpilo hpwdt sg ipmi_si ipmi_devintf ipmi_msghandler ioatdma shpchp lpc_ich wmi dca acpi_power_meter dm_multipath ip_tables xfs libcrc32c sd_mod crc_t10dif crct10dif_generic qedf(OE) libfcoe mgag200 libfc i2c_algo_bit drm_kms_helper scsi_transport_fc qede(OE) syscopyarea sysfillrect sysimgblt fb_sys_fops ttm qed(OE) drm crct10dif_pclmul e1000e crct10dif_common crc32c_intel scsi_tgt hpsa i2c_core ptp scsi_transport_sas pps_core dm_mirror dm_region_hash dm_log dm_mod CPU: 2 PID: 7639 Comm: kworker/2:1 Kdump: loaded Tainted: G OEL ------------ 3.10.0-861.el7.x86_64 #1 Hardware name: HP ProLiant DL580 Gen9/ProLiant DL580 Gen9, BIOS U17 07/21/2016 Workqueue: qedf_2_dpc qedf_handle_rrq [qedf] task: ffff959edd628fd0 ti: ffff959ed6f08000 task.ti: ffff959ed6f08000 RIP: 0010:[<ffffffff8355913a>] [<ffffffff8355913a>] delay_tsc+0x3a/0x60 RSP: 0018:ffff959ed6f0bd30 EFLAGS: 00000246 RAX: 000000008ef5f791 RBX: 5f646d635f666465 RCX: 0000025b8ededa2f RDX: 000000000000025b RSI: 0000000000000002 RDI: 0000000000217d1e RBP: ffff959ed6f0bd30 R08: ffffffffc079aae8 R09: 0000000000000200 R10: ffffffffc07952c6 R11: 0000000000000000 R12: 6c6c615f66646571 R13: ffff959ed6f0bcc8 R14: ffff959ed6f0bd08 R15: ffff959e00000028 FS: 0000000000000000(0000) GS:ffff959eff480000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f4117fa1eb0 CR3: 0000002039e66000 CR4: 00000000003607e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: [<ffffffff8355907d>] __const_udelay+0x2d/0x30 [<ffffffffc079444a>] qedf_initiate_els+0x13a/0x450 [qedf] [<ffffffffc0794210>] ? qedf_srr_compl+0x2a0/0x2a0 [qedf] [<ffffffffc0795337>] qedf_send_rrq+0x127/0x230 [qedf] [<ffffffffc078ed55>] qedf_handle_rrq+0x15/0x20 [qedf] [<ffffffff832b2dff>] process_one_work+0x17f/0x440 [<ffffffff832b3ac6>] worker_thread+0x126/0x3c0 [<ffffffff832b39a0>] ? manage_workers.isra.24+0x2a0/0x2a0 [<ffffffff832bae31>] kthread+0xd1/0xe0 [<ffffffff832bad60>] ? insert_kthread_work+0x40/0x40 [<ffffffff8391f637>] ret_from_fork_nospec_begin+0x21/0x21 [<ffffffff832bad60>] ? insert_kthread_work+0x40/0x40 Signed-off-by: Chad Dupuis <cdupuis@marvell.com> Signed-off-by: Saurav Kashyap <skashyap@marvell.com> Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
2019-03-26 15:38:33 +08:00
QEDF_INFO(&qedf->dbg_ctx, QEDF_LOG_ELS,
"Failed to alloc ELS request 0x%x\n", op);
rc = -ENOMEM;
goto els_err;
}
QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_ELS, "initiate_els els_req = "
"0x%p cb_arg = %p xid = %x\n", els_req, cb_arg,
els_req->xid);
els_req->sc_cmd = NULL;
els_req->cmd_type = QEDF_ELS;
els_req->fcport = fcport;
els_req->cb_func = cb_func;
cb_arg->io_req = els_req;
cb_arg->op = op;
els_req->cb_arg = cb_arg;
els_req->data_xfer_len = data_len;
/* Record which cpu this request is associated with */
els_req->cpu = smp_processor_id();
mp_req = (struct qedf_mp_req *)&(els_req->mp_req);
rc = qedf_init_mp_req(els_req);
if (rc) {
QEDF_ERR(&(qedf->dbg_ctx), "ELS MP request init failed\n");
kref_put(&els_req->refcount, qedf_release_cmd);
goto els_err;
} else {
rc = 0;
}
/* Fill ELS Payload */
if ((op >= ELS_LS_RJT) && (op <= ELS_AUTH_ELS)) {
memcpy(mp_req->req_buf, data, data_len);
} else {
QEDF_ERR(&(qedf->dbg_ctx), "Invalid ELS op 0x%x\n", op);
els_req->cb_func = NULL;
els_req->cb_arg = NULL;
kref_put(&els_req->refcount, qedf_release_cmd);
rc = -EINVAL;
}
if (rc)
goto els_err;
/* Fill FC header */
fc_hdr = &(mp_req->req_fc_hdr);
did = fcport->rdata->ids.port_id;
sid = fcport->sid;
__fc_fill_fc_hdr(fc_hdr, FC_RCTL_ELS_REQ, did, sid,
FC_TYPE_ELS, FC_FC_FIRST_SEQ | FC_FC_END_SEQ |
FC_FC_SEQ_INIT, 0);
/* Obtain exchange id */
xid = els_req->xid;
spin_lock_irqsave(&fcport->rport_lock, flags);
sqe_idx = qedf_get_sqe_idx(fcport);
sqe = &fcport->sq[sqe_idx];
memset(sqe, 0, sizeof(struct fcoe_wqe));
/* Initialize task context for this IO request */
task = qedf_get_task_mem(&qedf->tasks, xid);
qedf_init_mp_task(els_req, task, sqe);
/* Put timer on original I/O request */
if (timer_msec)
qedf_cmd_timer_set(qedf, els_req, timer_msec);
/* Ring doorbell */
QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_ELS, "Ringing doorbell for ELS "
"req\n");
qedf_ring_doorbell(fcport);
set_bit(QEDF_CMD_OUTSTANDING, &els_req->flags);
spin_unlock_irqrestore(&fcport->rport_lock, flags);
els_err:
return rc;
}
void qedf_process_els_compl(struct qedf_ctx *qedf, struct fcoe_cqe *cqe,
struct qedf_ioreq *els_req)
{
struct fcoe_cqe_midpath_info *mp_info;
QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_ELS, "Entered with xid = 0x%x"
" cmd_type = %d.\n", els_req->xid, els_req->cmd_type);
clear_bit(QEDF_CMD_OUTSTANDING, &els_req->flags);
/* Kill the ELS timer */
cancel_delayed_work(&els_req->timeout_work);
/* Get ELS response length from CQE */
mp_info = &cqe->cqe_info.midpath_info;
els_req->mp_req.resp_len = mp_info->data_placement_size;
/* Parse ELS response */
if ((els_req->cb_func) && (els_req->cb_arg)) {
els_req->cb_func(els_req->cb_arg);
els_req->cb_arg = NULL;
}
kref_put(&els_req->refcount, qedf_release_cmd);
}
static void qedf_rrq_compl(struct qedf_els_cb_arg *cb_arg)
{
struct qedf_ioreq *orig_io_req;
struct qedf_ioreq *rrq_req;
struct qedf_ctx *qedf;
int refcount;
rrq_req = cb_arg->io_req;
qedf = rrq_req->fcport->qedf;
QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_ELS, "Entered.\n");
orig_io_req = cb_arg->aborted_io_req;
if (!orig_io_req) {
QEDF_ERR(&qedf->dbg_ctx,
"Original io_req is NULL, rrq_req = %p.\n", rrq_req);
goto out_free;
}
if (rrq_req->event != QEDF_IOREQ_EV_ELS_TMO &&
rrq_req->event != QEDF_IOREQ_EV_ELS_ERR_DETECT)
cancel_delayed_work_sync(&orig_io_req->timeout_work);
refcount = kref_read(&orig_io_req->refcount);
QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_ELS, "rrq_compl: orig io = %p,"
" orig xid = 0x%x, rrq_xid = 0x%x, refcount=%d\n",
orig_io_req, orig_io_req->xid, rrq_req->xid, refcount);
/*
* This should return the aborted io_req to the command pool. Note that
* we need to check the refcound in case the original request was
* flushed but we get a completion on this xid.
*/
if (orig_io_req && refcount > 0)
kref_put(&orig_io_req->refcount, qedf_release_cmd);
out_free:
/*
* Release a reference to the rrq request if we timed out as the
* rrq completion handler is called directly from the timeout handler
* and not from els_compl where the reference would have normally been
* released.
*/
if (rrq_req->event == QEDF_IOREQ_EV_ELS_TMO)
kref_put(&rrq_req->refcount, qedf_release_cmd);
kfree(cb_arg);
}
/* Assumes kref is already held by caller */
int qedf_send_rrq(struct qedf_ioreq *aborted_io_req)
{
struct fc_els_rrq rrq;
struct qedf_rport *fcport;
struct fc_lport *lport;
struct qedf_els_cb_arg *cb_arg = NULL;
struct qedf_ctx *qedf;
uint32_t sid;
uint32_t r_a_tov;
int rc;
int refcount;
if (!aborted_io_req) {
QEDF_ERR(NULL, "abort_io_req is NULL.\n");
return -EINVAL;
}
fcport = aborted_io_req->fcport;
if (!fcport) {
refcount = kref_read(&aborted_io_req->refcount);
QEDF_ERR(NULL,
"RRQ work was queued prior to a flush xid=0x%x, refcount=%d.\n",
aborted_io_req->xid, refcount);
kref_put(&aborted_io_req->refcount, qedf_release_cmd);
return -EINVAL;
}
/* Check that fcport is still offloaded */
if (!test_bit(QEDF_RPORT_SESSION_READY, &fcport->flags)) {
QEDF_ERR(NULL, "fcport is no longer offloaded.\n");
return -EINVAL;
}
if (!fcport->qedf) {
QEDF_ERR(NULL, "fcport->qedf is NULL.\n");
return -EINVAL;
}
qedf = fcport->qedf;
/*
* Sanity check that we can send a RRQ to make sure that refcount isn't
* 0
*/
refcount = kref_read(&aborted_io_req->refcount);
if (refcount != 1) {
QEDF_INFO(&qedf->dbg_ctx, QEDF_LOG_ELS,
"refcount for xid=%x io_req=%p refcount=%d is not 1.\n",
aborted_io_req->xid, aborted_io_req, refcount);
return -EINVAL;
}
lport = qedf->lport;
sid = fcport->sid;
r_a_tov = lport->r_a_tov;
QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_ELS, "Sending RRQ orig "
"io = %p, orig_xid = 0x%x\n", aborted_io_req,
aborted_io_req->xid);
memset(&rrq, 0, sizeof(rrq));
cb_arg = kzalloc(sizeof(struct qedf_els_cb_arg), GFP_NOIO);
if (!cb_arg) {
QEDF_ERR(&(qedf->dbg_ctx), "Unable to allocate cb_arg for "
"RRQ\n");
rc = -ENOMEM;
goto rrq_err;
}
cb_arg->aborted_io_req = aborted_io_req;
rrq.rrq_cmd = ELS_RRQ;
hton24(rrq.rrq_s_id, sid);
rrq.rrq_ox_id = htons(aborted_io_req->xid);
rrq.rrq_rx_id =
htons(aborted_io_req->task->tstorm_st_context.read_write.rx_id);
rc = qedf_initiate_els(fcport, ELS_RRQ, &rrq, sizeof(rrq),
qedf_rrq_compl, cb_arg, r_a_tov);
rrq_err:
if (rc) {
QEDF_ERR(&(qedf->dbg_ctx), "RRQ failed - release orig io "
"req 0x%x\n", aborted_io_req->xid);
kfree(cb_arg);
kref_put(&aborted_io_req->refcount, qedf_release_cmd);
}
return rc;
}
static void qedf_process_l2_frame_compl(struct qedf_rport *fcport,
struct fc_frame *fp,
u16 l2_oxid)
{
struct fc_lport *lport = fcport->qedf->lport;
struct fc_frame_header *fh;
u32 crc;
fh = (struct fc_frame_header *)fc_frame_header_get(fp);
/* Set the OXID we return to what libfc used */
if (l2_oxid != FC_XID_UNKNOWN)
fh->fh_ox_id = htons(l2_oxid);
/* Setup header fields */
fh->fh_r_ctl = FC_RCTL_ELS_REP;
fh->fh_type = FC_TYPE_ELS;
/* Last sequence, end sequence */
fh->fh_f_ctl[0] = 0x98;
hton24(fh->fh_d_id, lport->port_id);
hton24(fh->fh_s_id, fcport->rdata->ids.port_id);
fh->fh_rx_id = 0xffff;
/* Set frame attributes */
crc = fcoe_fc_crc(fp);
fc_frame_init(fp);
fr_dev(fp) = lport;
fr_sof(fp) = FC_SOF_I3;
fr_eof(fp) = FC_EOF_T;
fr_crc(fp) = cpu_to_le32(~crc);
/* Send completed request to libfc */
fc_exch_recv(lport, fp);
}
/*
* In instances where an ELS command times out we may need to restart the
* rport by logging out and then logging back in.
*/
void qedf_restart_rport(struct qedf_rport *fcport)
{
struct fc_lport *lport;
struct fc_rport_priv *rdata;
u32 port_id;
unsigned long flags;
if (!fcport) {
QEDF_ERR(NULL, "fcport is NULL.\n");
return;
}
spin_lock_irqsave(&fcport->rport_lock, flags);
scsi: qedf: Synchronize rport restarts when multiple ELS commands time out If multiple ELS commands time out, such as aborts, they could all try to restart the same rport and the same time. This could mean multiple multiple processes trying to clean up any outstanding commands or trying to upload the same port. Add a new flag (QEDF_RPORT_IN_RESET) and check other fcport state flags before trying to reset the port. Fixes the crash: [17501.824701] ------------[ cut here ]------------ [17501.824733] kernel BUG at include/asm-generic/dma-mapping-common.h:65! [17501.824760] invalid opcode: 0000 [#1] SMP [17501.824781] Modules linked in: xt_CHECKSUM iptable_mangle ipt_MASQUERADE nf_nat_masquerade_ipv4 iptable_nat nf_nat_ipv4 nf_nat nf_conntrack_ipv4 nf_defrag_ipv4 xt_conntrack nf_conntrack ipt_REJECT nf_reject_ipv4 tun bridge stp llc ebtable_filter ebtables ip6table_filter ip6_tables iptable_filter ses enclosure dm_service_time vfat fat sb_edac edac_core intel_powerclamp coretemp intel_rapl iosf_mbi kvm_intel kvm irqbypass joydev btrfs hpilo raid6_pq iTCO_wdt iTCO_vendor_support xor hpwdt ipmi_ssif sg crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul ioatdma lpc_ich glue_helper ablk_helper i2c_i801 shpchp cryptd ipmi_si pcspkr acpi_power_meter ipmi_devintf pcc_cpufreq dca wmi ipmi_msghandler dm_multipath nfsd auth_rpcgss nfs_acl lockd grace sunrpc ip_tables xfs libcrc32c sr_mod cdrom sd_mod [17501.825119] crc_t10dif crct10dif_generic mgag200 i2c_algo_bit drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm qedf(OE) drm libfcoe ahci qedi(OE) crct10dif_pclmul libfc libahci uio crct10dif_common crc32c_intel libiscsi libata scsi_transport_iscsi scsi_transport_fc tg3 qede(OE) scsi_tgt hpsa qed(OE) i2c_core ptp scsi_transport_sas pps_core iscsi_boot_sysfs dm_mirror dm_region_hash dm_log dm_mod [17501.825292] CPU: 8 PID: 10531 Comm: kworker/u96:1 Tainted: G OE ------------ 3.10.0-693.el7.x86_64 #1 [17501.825330] Hardware name: HP ProLiant DL380 Gen9/ProLiant DL380 Gen9, BIOS P89 06/02/2016 [17501.825372] Workqueue: fc_rport_eq fc_rport_work [libfc] [17501.825395] task: ffff88101bca8000 ti: ffff881025278000 task.ti: ffff881025278000 [17501.825424] RIP: 0010:[<ffffffffc042def9>] [<ffffffffc042def9>] qedf_unmap_sg_list.isra.15+0x89/0x90 [qedf] [17501.825471] RSP: 0018:ffff88102527bb98 EFLAGS: 00010212 [17501.825493] RAX: ffff8800224eac00 RBX: ffffc9000cd05210 RCX: 0000000000001000 [17501.825520] RDX: 000000007e655e40 RSI: 0000000000001000 RDI: ffff88107fe3b098 [17501.826683] RBP: ffff88102527bba0 R08: ffffffff81a13200 R09: 0000000000000286 [17501.827747] R10: 0000000000000004 R11: 0000000000000005 R12: ffffc9000cd051b8 [17501.828804] R13: ffff881037640c28 R14: 0000000000000007 R15: ffffc9000cd05200 [17501.829850] FS: 0000000000000000(0000) GS:ffff88103fa00000(0000) knlGS:0000000000000000 [17501.830910] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [17501.831966] CR2: 00007f9b94005f38 CR3: 00000000019f2000 CR4: 00000000003407e0 [17501.833027] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [17501.834087] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [17501.835142] Stack: [17501.836201] ffff881033ddbb80 ffff88102527bc30 ffffffffc042f834 0000000000002710 [17501.837264] ffff88102527bbd0 ffffffff8133d9dd ffffc9000cd052a0 ffff88102527bc30 [17501.838325] ffffffff816a9c65 0000000000000001 ffff88101bca8000 ffffffff810c4810 [17501.839388] Call Trace: [17501.840446] [<ffffffffc042f834>] qedf_scsi_done+0x54/0x1d0 [qedf] [17501.841504] [<ffffffff8133d9dd>] ? list_del+0xd/0x30 [17501.842537] [<ffffffff816a9c65>] ? wait_for_completion_timeout+0x125/0x140 [17501.843560] [<ffffffff810c4810>] ? wake_up_state+0x20/0x20 [17501.844577] [<ffffffffc0430311>] qedf_initiate_cleanup+0x2e1/0x310 [qedf] [17501.845587] [<ffffffffc04305fe>] qedf_flush_active_ios+0x10e/0x260 [qedf] [17501.846612] [<ffffffffc042892f>] qedf_cleanup_fcport+0x5f/0x370 [qedf] [17501.847613] [<ffffffffc04292d8>] qedf_rport_event_handler+0x398/0x950 [qedf] [17501.848602] [<ffffffff810cdc7c>] ? dequeue_entity+0x11c/0x5d0 [17501.849581] [<ffffffff81098a2b>] ? __internal_add_timer+0xab/0x130 [17501.850555] [<ffffffff810ce54e>] ? dequeue_task_fair+0x41e/0x660 [17501.851528] [<ffffffffc03241a4>] fc_rport_work+0xf4/0x6c0 [libfc] [17501.852490] [<ffffffff810a881a>] process_one_work+0x17a/0x440 [17501.853446] [<ffffffff810a94e6>] worker_thread+0x126/0x3c0 Signed-off-by: Chad Dupuis <chad.dupuis@cavium.com> Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
2018-04-25 21:08:45 +08:00
if (test_bit(QEDF_RPORT_IN_RESET, &fcport->flags) ||
!test_bit(QEDF_RPORT_SESSION_READY, &fcport->flags) ||
test_bit(QEDF_RPORT_UPLOADING_CONNECTION, &fcport->flags)) {
QEDF_ERR(&(fcport->qedf->dbg_ctx), "fcport %p already in reset or not offloaded.\n",
fcport);
spin_unlock_irqrestore(&fcport->rport_lock, flags);
scsi: qedf: Synchronize rport restarts when multiple ELS commands time out If multiple ELS commands time out, such as aborts, they could all try to restart the same rport and the same time. This could mean multiple multiple processes trying to clean up any outstanding commands or trying to upload the same port. Add a new flag (QEDF_RPORT_IN_RESET) and check other fcport state flags before trying to reset the port. Fixes the crash: [17501.824701] ------------[ cut here ]------------ [17501.824733] kernel BUG at include/asm-generic/dma-mapping-common.h:65! [17501.824760] invalid opcode: 0000 [#1] SMP [17501.824781] Modules linked in: xt_CHECKSUM iptable_mangle ipt_MASQUERADE nf_nat_masquerade_ipv4 iptable_nat nf_nat_ipv4 nf_nat nf_conntrack_ipv4 nf_defrag_ipv4 xt_conntrack nf_conntrack ipt_REJECT nf_reject_ipv4 tun bridge stp llc ebtable_filter ebtables ip6table_filter ip6_tables iptable_filter ses enclosure dm_service_time vfat fat sb_edac edac_core intel_powerclamp coretemp intel_rapl iosf_mbi kvm_intel kvm irqbypass joydev btrfs hpilo raid6_pq iTCO_wdt iTCO_vendor_support xor hpwdt ipmi_ssif sg crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul ioatdma lpc_ich glue_helper ablk_helper i2c_i801 shpchp cryptd ipmi_si pcspkr acpi_power_meter ipmi_devintf pcc_cpufreq dca wmi ipmi_msghandler dm_multipath nfsd auth_rpcgss nfs_acl lockd grace sunrpc ip_tables xfs libcrc32c sr_mod cdrom sd_mod [17501.825119] crc_t10dif crct10dif_generic mgag200 i2c_algo_bit drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm qedf(OE) drm libfcoe ahci qedi(OE) crct10dif_pclmul libfc libahci uio crct10dif_common crc32c_intel libiscsi libata scsi_transport_iscsi scsi_transport_fc tg3 qede(OE) scsi_tgt hpsa qed(OE) i2c_core ptp scsi_transport_sas pps_core iscsi_boot_sysfs dm_mirror dm_region_hash dm_log dm_mod [17501.825292] CPU: 8 PID: 10531 Comm: kworker/u96:1 Tainted: G OE ------------ 3.10.0-693.el7.x86_64 #1 [17501.825330] Hardware name: HP ProLiant DL380 Gen9/ProLiant DL380 Gen9, BIOS P89 06/02/2016 [17501.825372] Workqueue: fc_rport_eq fc_rport_work [libfc] [17501.825395] task: ffff88101bca8000 ti: ffff881025278000 task.ti: ffff881025278000 [17501.825424] RIP: 0010:[<ffffffffc042def9>] [<ffffffffc042def9>] qedf_unmap_sg_list.isra.15+0x89/0x90 [qedf] [17501.825471] RSP: 0018:ffff88102527bb98 EFLAGS: 00010212 [17501.825493] RAX: ffff8800224eac00 RBX: ffffc9000cd05210 RCX: 0000000000001000 [17501.825520] RDX: 000000007e655e40 RSI: 0000000000001000 RDI: ffff88107fe3b098 [17501.826683] RBP: ffff88102527bba0 R08: ffffffff81a13200 R09: 0000000000000286 [17501.827747] R10: 0000000000000004 R11: 0000000000000005 R12: ffffc9000cd051b8 [17501.828804] R13: ffff881037640c28 R14: 0000000000000007 R15: ffffc9000cd05200 [17501.829850] FS: 0000000000000000(0000) GS:ffff88103fa00000(0000) knlGS:0000000000000000 [17501.830910] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [17501.831966] CR2: 00007f9b94005f38 CR3: 00000000019f2000 CR4: 00000000003407e0 [17501.833027] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [17501.834087] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [17501.835142] Stack: [17501.836201] ffff881033ddbb80 ffff88102527bc30 ffffffffc042f834 0000000000002710 [17501.837264] ffff88102527bbd0 ffffffff8133d9dd ffffc9000cd052a0 ffff88102527bc30 [17501.838325] ffffffff816a9c65 0000000000000001 ffff88101bca8000 ffffffff810c4810 [17501.839388] Call Trace: [17501.840446] [<ffffffffc042f834>] qedf_scsi_done+0x54/0x1d0 [qedf] [17501.841504] [<ffffffff8133d9dd>] ? list_del+0xd/0x30 [17501.842537] [<ffffffff816a9c65>] ? wait_for_completion_timeout+0x125/0x140 [17501.843560] [<ffffffff810c4810>] ? wake_up_state+0x20/0x20 [17501.844577] [<ffffffffc0430311>] qedf_initiate_cleanup+0x2e1/0x310 [qedf] [17501.845587] [<ffffffffc04305fe>] qedf_flush_active_ios+0x10e/0x260 [qedf] [17501.846612] [<ffffffffc042892f>] qedf_cleanup_fcport+0x5f/0x370 [qedf] [17501.847613] [<ffffffffc04292d8>] qedf_rport_event_handler+0x398/0x950 [qedf] [17501.848602] [<ffffffff810cdc7c>] ? dequeue_entity+0x11c/0x5d0 [17501.849581] [<ffffffff81098a2b>] ? __internal_add_timer+0xab/0x130 [17501.850555] [<ffffffff810ce54e>] ? dequeue_task_fair+0x41e/0x660 [17501.851528] [<ffffffffc03241a4>] fc_rport_work+0xf4/0x6c0 [libfc] [17501.852490] [<ffffffff810a881a>] process_one_work+0x17a/0x440 [17501.853446] [<ffffffff810a94e6>] worker_thread+0x126/0x3c0 Signed-off-by: Chad Dupuis <chad.dupuis@cavium.com> Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
2018-04-25 21:08:45 +08:00
return;
}
/* Set that we are now in reset */
set_bit(QEDF_RPORT_IN_RESET, &fcport->flags);
spin_unlock_irqrestore(&fcport->rport_lock, flags);
scsi: qedf: Synchronize rport restarts when multiple ELS commands time out If multiple ELS commands time out, such as aborts, they could all try to restart the same rport and the same time. This could mean multiple multiple processes trying to clean up any outstanding commands or trying to upload the same port. Add a new flag (QEDF_RPORT_IN_RESET) and check other fcport state flags before trying to reset the port. Fixes the crash: [17501.824701] ------------[ cut here ]------------ [17501.824733] kernel BUG at include/asm-generic/dma-mapping-common.h:65! [17501.824760] invalid opcode: 0000 [#1] SMP [17501.824781] Modules linked in: xt_CHECKSUM iptable_mangle ipt_MASQUERADE nf_nat_masquerade_ipv4 iptable_nat nf_nat_ipv4 nf_nat nf_conntrack_ipv4 nf_defrag_ipv4 xt_conntrack nf_conntrack ipt_REJECT nf_reject_ipv4 tun bridge stp llc ebtable_filter ebtables ip6table_filter ip6_tables iptable_filter ses enclosure dm_service_time vfat fat sb_edac edac_core intel_powerclamp coretemp intel_rapl iosf_mbi kvm_intel kvm irqbypass joydev btrfs hpilo raid6_pq iTCO_wdt iTCO_vendor_support xor hpwdt ipmi_ssif sg crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul ioatdma lpc_ich glue_helper ablk_helper i2c_i801 shpchp cryptd ipmi_si pcspkr acpi_power_meter ipmi_devintf pcc_cpufreq dca wmi ipmi_msghandler dm_multipath nfsd auth_rpcgss nfs_acl lockd grace sunrpc ip_tables xfs libcrc32c sr_mod cdrom sd_mod [17501.825119] crc_t10dif crct10dif_generic mgag200 i2c_algo_bit drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm qedf(OE) drm libfcoe ahci qedi(OE) crct10dif_pclmul libfc libahci uio crct10dif_common crc32c_intel libiscsi libata scsi_transport_iscsi scsi_transport_fc tg3 qede(OE) scsi_tgt hpsa qed(OE) i2c_core ptp scsi_transport_sas pps_core iscsi_boot_sysfs dm_mirror dm_region_hash dm_log dm_mod [17501.825292] CPU: 8 PID: 10531 Comm: kworker/u96:1 Tainted: G OE ------------ 3.10.0-693.el7.x86_64 #1 [17501.825330] Hardware name: HP ProLiant DL380 Gen9/ProLiant DL380 Gen9, BIOS P89 06/02/2016 [17501.825372] Workqueue: fc_rport_eq fc_rport_work [libfc] [17501.825395] task: ffff88101bca8000 ti: ffff881025278000 task.ti: ffff881025278000 [17501.825424] RIP: 0010:[<ffffffffc042def9>] [<ffffffffc042def9>] qedf_unmap_sg_list.isra.15+0x89/0x90 [qedf] [17501.825471] RSP: 0018:ffff88102527bb98 EFLAGS: 00010212 [17501.825493] RAX: ffff8800224eac00 RBX: ffffc9000cd05210 RCX: 0000000000001000 [17501.825520] RDX: 000000007e655e40 RSI: 0000000000001000 RDI: ffff88107fe3b098 [17501.826683] RBP: ffff88102527bba0 R08: ffffffff81a13200 R09: 0000000000000286 [17501.827747] R10: 0000000000000004 R11: 0000000000000005 R12: ffffc9000cd051b8 [17501.828804] R13: ffff881037640c28 R14: 0000000000000007 R15: ffffc9000cd05200 [17501.829850] FS: 0000000000000000(0000) GS:ffff88103fa00000(0000) knlGS:0000000000000000 [17501.830910] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [17501.831966] CR2: 00007f9b94005f38 CR3: 00000000019f2000 CR4: 00000000003407e0 [17501.833027] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [17501.834087] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [17501.835142] Stack: [17501.836201] ffff881033ddbb80 ffff88102527bc30 ffffffffc042f834 0000000000002710 [17501.837264] ffff88102527bbd0 ffffffff8133d9dd ffffc9000cd052a0 ffff88102527bc30 [17501.838325] ffffffff816a9c65 0000000000000001 ffff88101bca8000 ffffffff810c4810 [17501.839388] Call Trace: [17501.840446] [<ffffffffc042f834>] qedf_scsi_done+0x54/0x1d0 [qedf] [17501.841504] [<ffffffff8133d9dd>] ? list_del+0xd/0x30 [17501.842537] [<ffffffff816a9c65>] ? wait_for_completion_timeout+0x125/0x140 [17501.843560] [<ffffffff810c4810>] ? wake_up_state+0x20/0x20 [17501.844577] [<ffffffffc0430311>] qedf_initiate_cleanup+0x2e1/0x310 [qedf] [17501.845587] [<ffffffffc04305fe>] qedf_flush_active_ios+0x10e/0x260 [qedf] [17501.846612] [<ffffffffc042892f>] qedf_cleanup_fcport+0x5f/0x370 [qedf] [17501.847613] [<ffffffffc04292d8>] qedf_rport_event_handler+0x398/0x950 [qedf] [17501.848602] [<ffffffff810cdc7c>] ? dequeue_entity+0x11c/0x5d0 [17501.849581] [<ffffffff81098a2b>] ? __internal_add_timer+0xab/0x130 [17501.850555] [<ffffffff810ce54e>] ? dequeue_task_fair+0x41e/0x660 [17501.851528] [<ffffffffc03241a4>] fc_rport_work+0xf4/0x6c0 [libfc] [17501.852490] [<ffffffff810a881a>] process_one_work+0x17a/0x440 [17501.853446] [<ffffffff810a94e6>] worker_thread+0x126/0x3c0 Signed-off-by: Chad Dupuis <chad.dupuis@cavium.com> Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
2018-04-25 21:08:45 +08:00
rdata = fcport->rdata;
if (rdata && !kref_get_unless_zero(&rdata->kref)) {
fcport->rdata = NULL;
rdata = NULL;
}
if (rdata && rdata->rp_state == RPORT_ST_READY) {
lport = fcport->qedf->lport;
port_id = rdata->ids.port_id;
QEDF_ERR(&(fcport->qedf->dbg_ctx),
"LOGO port_id=%x.\n", port_id);
fc_rport_logoff(rdata);
kref_put(&rdata->kref, fc_rport_destroy);
mutex_lock(&lport->disc.disc_mutex);
/* Recreate the rport and log back in */
rdata = fc_rport_create(lport, port_id);
if (rdata) {
mutex_unlock(&lport->disc.disc_mutex);
fc_rport_login(rdata);
fcport->rdata = rdata;
} else {
mutex_unlock(&lport->disc.disc_mutex);
fcport->rdata = NULL;
}
}
scsi: qedf: Synchronize rport restarts when multiple ELS commands time out If multiple ELS commands time out, such as aborts, they could all try to restart the same rport and the same time. This could mean multiple multiple processes trying to clean up any outstanding commands or trying to upload the same port. Add a new flag (QEDF_RPORT_IN_RESET) and check other fcport state flags before trying to reset the port. Fixes the crash: [17501.824701] ------------[ cut here ]------------ [17501.824733] kernel BUG at include/asm-generic/dma-mapping-common.h:65! [17501.824760] invalid opcode: 0000 [#1] SMP [17501.824781] Modules linked in: xt_CHECKSUM iptable_mangle ipt_MASQUERADE nf_nat_masquerade_ipv4 iptable_nat nf_nat_ipv4 nf_nat nf_conntrack_ipv4 nf_defrag_ipv4 xt_conntrack nf_conntrack ipt_REJECT nf_reject_ipv4 tun bridge stp llc ebtable_filter ebtables ip6table_filter ip6_tables iptable_filter ses enclosure dm_service_time vfat fat sb_edac edac_core intel_powerclamp coretemp intel_rapl iosf_mbi kvm_intel kvm irqbypass joydev btrfs hpilo raid6_pq iTCO_wdt iTCO_vendor_support xor hpwdt ipmi_ssif sg crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul ioatdma lpc_ich glue_helper ablk_helper i2c_i801 shpchp cryptd ipmi_si pcspkr acpi_power_meter ipmi_devintf pcc_cpufreq dca wmi ipmi_msghandler dm_multipath nfsd auth_rpcgss nfs_acl lockd grace sunrpc ip_tables xfs libcrc32c sr_mod cdrom sd_mod [17501.825119] crc_t10dif crct10dif_generic mgag200 i2c_algo_bit drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm qedf(OE) drm libfcoe ahci qedi(OE) crct10dif_pclmul libfc libahci uio crct10dif_common crc32c_intel libiscsi libata scsi_transport_iscsi scsi_transport_fc tg3 qede(OE) scsi_tgt hpsa qed(OE) i2c_core ptp scsi_transport_sas pps_core iscsi_boot_sysfs dm_mirror dm_region_hash dm_log dm_mod [17501.825292] CPU: 8 PID: 10531 Comm: kworker/u96:1 Tainted: G OE ------------ 3.10.0-693.el7.x86_64 #1 [17501.825330] Hardware name: HP ProLiant DL380 Gen9/ProLiant DL380 Gen9, BIOS P89 06/02/2016 [17501.825372] Workqueue: fc_rport_eq fc_rport_work [libfc] [17501.825395] task: ffff88101bca8000 ti: ffff881025278000 task.ti: ffff881025278000 [17501.825424] RIP: 0010:[<ffffffffc042def9>] [<ffffffffc042def9>] qedf_unmap_sg_list.isra.15+0x89/0x90 [qedf] [17501.825471] RSP: 0018:ffff88102527bb98 EFLAGS: 00010212 [17501.825493] RAX: ffff8800224eac00 RBX: ffffc9000cd05210 RCX: 0000000000001000 [17501.825520] RDX: 000000007e655e40 RSI: 0000000000001000 RDI: ffff88107fe3b098 [17501.826683] RBP: ffff88102527bba0 R08: ffffffff81a13200 R09: 0000000000000286 [17501.827747] R10: 0000000000000004 R11: 0000000000000005 R12: ffffc9000cd051b8 [17501.828804] R13: ffff881037640c28 R14: 0000000000000007 R15: ffffc9000cd05200 [17501.829850] FS: 0000000000000000(0000) GS:ffff88103fa00000(0000) knlGS:0000000000000000 [17501.830910] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [17501.831966] CR2: 00007f9b94005f38 CR3: 00000000019f2000 CR4: 00000000003407e0 [17501.833027] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [17501.834087] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [17501.835142] Stack: [17501.836201] ffff881033ddbb80 ffff88102527bc30 ffffffffc042f834 0000000000002710 [17501.837264] ffff88102527bbd0 ffffffff8133d9dd ffffc9000cd052a0 ffff88102527bc30 [17501.838325] ffffffff816a9c65 0000000000000001 ffff88101bca8000 ffffffff810c4810 [17501.839388] Call Trace: [17501.840446] [<ffffffffc042f834>] qedf_scsi_done+0x54/0x1d0 [qedf] [17501.841504] [<ffffffff8133d9dd>] ? list_del+0xd/0x30 [17501.842537] [<ffffffff816a9c65>] ? wait_for_completion_timeout+0x125/0x140 [17501.843560] [<ffffffff810c4810>] ? wake_up_state+0x20/0x20 [17501.844577] [<ffffffffc0430311>] qedf_initiate_cleanup+0x2e1/0x310 [qedf] [17501.845587] [<ffffffffc04305fe>] qedf_flush_active_ios+0x10e/0x260 [qedf] [17501.846612] [<ffffffffc042892f>] qedf_cleanup_fcport+0x5f/0x370 [qedf] [17501.847613] [<ffffffffc04292d8>] qedf_rport_event_handler+0x398/0x950 [qedf] [17501.848602] [<ffffffff810cdc7c>] ? dequeue_entity+0x11c/0x5d0 [17501.849581] [<ffffffff81098a2b>] ? __internal_add_timer+0xab/0x130 [17501.850555] [<ffffffff810ce54e>] ? dequeue_task_fair+0x41e/0x660 [17501.851528] [<ffffffffc03241a4>] fc_rport_work+0xf4/0x6c0 [libfc] [17501.852490] [<ffffffff810a881a>] process_one_work+0x17a/0x440 [17501.853446] [<ffffffff810a94e6>] worker_thread+0x126/0x3c0 Signed-off-by: Chad Dupuis <chad.dupuis@cavium.com> Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
2018-04-25 21:08:45 +08:00
clear_bit(QEDF_RPORT_IN_RESET, &fcport->flags);
}
static void qedf_l2_els_compl(struct qedf_els_cb_arg *cb_arg)
{
struct qedf_ioreq *els_req;
struct qedf_rport *fcport;
struct qedf_mp_req *mp_req;
struct fc_frame *fp;
struct fc_frame_header *fh, *mp_fc_hdr;
void *resp_buf, *fc_payload;
u32 resp_len;
u16 l2_oxid;
l2_oxid = cb_arg->l2_oxid;
els_req = cb_arg->io_req;
if (!els_req) {
QEDF_ERR(NULL, "els_req is NULL.\n");
goto free_arg;
}
/*
* If we are flushing the command just free the cb_arg as none of the
* response data will be valid.
*/
if (els_req->event == QEDF_IOREQ_EV_ELS_FLUSH) {
QEDF_ERR(NULL, "els_req xid=0x%x event is flush.\n",
els_req->xid);
goto free_arg;
}
fcport = els_req->fcport;
mp_req = &(els_req->mp_req);
mp_fc_hdr = &(mp_req->resp_fc_hdr);
resp_len = mp_req->resp_len;
resp_buf = mp_req->resp_buf;
/*
* If a middle path ELS command times out, don't try to return
* the command but rather do any internal cleanup and then libfc
* timeout the command and clean up its internal resources.
*/
if (els_req->event == QEDF_IOREQ_EV_ELS_TMO) {
/*
* If ADISC times out, libfc will timeout the exchange and then
* try to send a PLOGI which will timeout since the session is
* still offloaded. Force libfc to logout the session which
* will offload the connection and allow the PLOGI response to
* flow over the LL2 path.
*/
if (cb_arg->op == ELS_ADISC)
qedf_restart_rport(fcport);
return;
}
if (sizeof(struct fc_frame_header) + resp_len > QEDF_PAGE_SIZE) {
QEDF_ERR(&(fcport->qedf->dbg_ctx), "resp_len is "
"beyond page size.\n");
goto free_arg;
}
fp = fc_frame_alloc(fcport->qedf->lport, resp_len);
if (!fp) {
QEDF_ERR(&(fcport->qedf->dbg_ctx),
"fc_frame_alloc failure.\n");
return;
}
/* Copy frame header from firmware into fp */
fh = (struct fc_frame_header *)fc_frame_header_get(fp);
memcpy(fh, mp_fc_hdr, sizeof(struct fc_frame_header));
/* Copy payload from firmware into fp */
fc_payload = fc_frame_payload_get(fp, resp_len);
memcpy(fc_payload, resp_buf, resp_len);
QEDF_INFO(&(fcport->qedf->dbg_ctx), QEDF_LOG_ELS,
"Completing OX_ID 0x%x back to libfc.\n", l2_oxid);
qedf_process_l2_frame_compl(fcport, fp, l2_oxid);
free_arg:
kfree(cb_arg);
}
int qedf_send_adisc(struct qedf_rport *fcport, struct fc_frame *fp)
{
struct fc_els_adisc *adisc;
struct fc_frame_header *fh;
struct fc_lport *lport = fcport->qedf->lport;
struct qedf_els_cb_arg *cb_arg = NULL;
struct qedf_ctx *qedf;
uint32_t r_a_tov = lport->r_a_tov;
int rc;
qedf = fcport->qedf;
fh = fc_frame_header_get(fp);
cb_arg = kzalloc(sizeof(struct qedf_els_cb_arg), GFP_NOIO);
if (!cb_arg) {
QEDF_ERR(&(qedf->dbg_ctx), "Unable to allocate cb_arg for "
"ADISC\n");
rc = -ENOMEM;
goto adisc_err;
}
cb_arg->l2_oxid = ntohs(fh->fh_ox_id);
QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_ELS,
"Sending ADISC ox_id=0x%x.\n", cb_arg->l2_oxid);
adisc = fc_frame_payload_get(fp, sizeof(*adisc));
rc = qedf_initiate_els(fcport, ELS_ADISC, adisc, sizeof(*adisc),
qedf_l2_els_compl, cb_arg, r_a_tov);
adisc_err:
if (rc) {
QEDF_ERR(&(qedf->dbg_ctx), "ADISC failed.\n");
kfree(cb_arg);
}
return rc;
}
static void qedf_srr_compl(struct qedf_els_cb_arg *cb_arg)
{
struct qedf_ioreq *orig_io_req;
struct qedf_ioreq *srr_req;
struct qedf_mp_req *mp_req;
struct fc_frame_header *mp_fc_hdr, *fh;
struct fc_frame *fp;
void *resp_buf, *fc_payload;
u32 resp_len;
struct fc_lport *lport;
struct qedf_ctx *qedf;
int refcount;
u8 opcode;
srr_req = cb_arg->io_req;
qedf = srr_req->fcport->qedf;
lport = qedf->lport;
orig_io_req = cb_arg->aborted_io_req;
if (!orig_io_req) {
QEDF_ERR(NULL, "orig_io_req is NULL.\n");
goto out_free;
}
clear_bit(QEDF_CMD_SRR_SENT, &orig_io_req->flags);
if (srr_req->event != QEDF_IOREQ_EV_ELS_TMO &&
srr_req->event != QEDF_IOREQ_EV_ELS_ERR_DETECT)
cancel_delayed_work_sync(&orig_io_req->timeout_work);
refcount = kref_read(&orig_io_req->refcount);
QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_ELS, "Entered: orig_io=%p,"
" orig_io_xid=0x%x, rec_xid=0x%x, refcount=%d\n",
orig_io_req, orig_io_req->xid, srr_req->xid, refcount);
/* If a SRR times out, simply free resources */
if (srr_req->event == QEDF_IOREQ_EV_ELS_TMO) {
QEDF_ERR(&qedf->dbg_ctx,
"ELS timeout rec_xid=0x%x.\n", srr_req->xid);
goto out_put;
}
/* Normalize response data into struct fc_frame */
mp_req = &(srr_req->mp_req);
mp_fc_hdr = &(mp_req->resp_fc_hdr);
resp_len = mp_req->resp_len;
resp_buf = mp_req->resp_buf;
fp = fc_frame_alloc(lport, resp_len);
if (!fp) {
QEDF_ERR(&(qedf->dbg_ctx),
"fc_frame_alloc failure.\n");
goto out_put;
}
/* Copy frame header from firmware into fp */
fh = (struct fc_frame_header *)fc_frame_header_get(fp);
memcpy(fh, mp_fc_hdr, sizeof(struct fc_frame_header));
/* Copy payload from firmware into fp */
fc_payload = fc_frame_payload_get(fp, resp_len);
memcpy(fc_payload, resp_buf, resp_len);
opcode = fc_frame_payload_op(fp);
switch (opcode) {
case ELS_LS_ACC:
QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_ELS,
"SRR success.\n");
break;
case ELS_LS_RJT:
QEDF_INFO(&qedf->dbg_ctx, QEDF_LOG_ELS,
"SRR rejected.\n");
qedf_initiate_abts(orig_io_req, true);
break;
}
fc_frame_free(fp);
out_put:
/* Put reference for original command since SRR completed */
kref_put(&orig_io_req->refcount, qedf_release_cmd);
out_free:
kfree(cb_arg);
}
static int qedf_send_srr(struct qedf_ioreq *orig_io_req, u32 offset, u8 r_ctl)
{
struct fcp_srr srr;
struct qedf_ctx *qedf;
struct qedf_rport *fcport;
struct fc_lport *lport;
struct qedf_els_cb_arg *cb_arg = NULL;
u32 r_a_tov;
int rc;
if (!orig_io_req) {
QEDF_ERR(NULL, "orig_io_req is NULL.\n");
return -EINVAL;
}
fcport = orig_io_req->fcport;
/* Check that fcport is still offloaded */
if (!test_bit(QEDF_RPORT_SESSION_READY, &fcport->flags)) {
QEDF_ERR(NULL, "fcport is no longer offloaded.\n");
return -EINVAL;
}
if (!fcport->qedf) {
QEDF_ERR(NULL, "fcport->qedf is NULL.\n");
return -EINVAL;
}
/* Take reference until SRR command completion */
kref_get(&orig_io_req->refcount);
qedf = fcport->qedf;
lport = qedf->lport;
r_a_tov = lport->r_a_tov;
QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_ELS, "Sending SRR orig_io=%p, "
"orig_xid=0x%x\n", orig_io_req, orig_io_req->xid);
memset(&srr, 0, sizeof(srr));
cb_arg = kzalloc(sizeof(struct qedf_els_cb_arg), GFP_NOIO);
if (!cb_arg) {
QEDF_ERR(&(qedf->dbg_ctx), "Unable to allocate cb_arg for "
"SRR\n");
rc = -ENOMEM;
goto srr_err;
}
cb_arg->aborted_io_req = orig_io_req;
srr.srr_op = ELS_SRR;
srr.srr_ox_id = htons(orig_io_req->xid);
srr.srr_rx_id = htons(orig_io_req->rx_id);
srr.srr_rel_off = htonl(offset);
srr.srr_r_ctl = r_ctl;
rc = qedf_initiate_els(fcport, ELS_SRR, &srr, sizeof(srr),
qedf_srr_compl, cb_arg, r_a_tov);
srr_err:
if (rc) {
QEDF_ERR(&(qedf->dbg_ctx), "SRR failed - release orig_io_req"
"=0x%x\n", orig_io_req->xid);
kfree(cb_arg);
/* If we fail to queue SRR, send ABTS to orig_io */
qedf_initiate_abts(orig_io_req, true);
kref_put(&orig_io_req->refcount, qedf_release_cmd);
} else
/* Tell other threads that SRR is in progress */
set_bit(QEDF_CMD_SRR_SENT, &orig_io_req->flags);
return rc;
}
static void qedf_initiate_seq_cleanup(struct qedf_ioreq *orig_io_req,
u32 offset, u8 r_ctl)
{
struct qedf_rport *fcport;
unsigned long flags;
struct qedf_els_cb_arg *cb_arg;
struct fcoe_wqe *sqe;
u16 sqe_idx;
fcport = orig_io_req->fcport;
QEDF_INFO(&(fcport->qedf->dbg_ctx), QEDF_LOG_ELS,
"Doing sequence cleanup for xid=0x%x offset=%u.\n",
orig_io_req->xid, offset);
cb_arg = kzalloc(sizeof(struct qedf_els_cb_arg), GFP_NOIO);
if (!cb_arg) {
QEDF_ERR(&(fcport->qedf->dbg_ctx), "Unable to allocate cb_arg "
"for sequence cleanup\n");
return;
}
/* Get reference for cleanup request */
kref_get(&orig_io_req->refcount);
orig_io_req->cmd_type = QEDF_SEQ_CLEANUP;
cb_arg->offset = offset;
cb_arg->r_ctl = r_ctl;
orig_io_req->cb_arg = cb_arg;
qedf_cmd_timer_set(fcport->qedf, orig_io_req,
QEDF_CLEANUP_TIMEOUT * HZ);
spin_lock_irqsave(&fcport->rport_lock, flags);
sqe_idx = qedf_get_sqe_idx(fcport);
sqe = &fcport->sq[sqe_idx];
memset(sqe, 0, sizeof(struct fcoe_wqe));
orig_io_req->task_params->sqe = sqe;
init_initiator_sequence_recovery_fcoe_task(orig_io_req->task_params,
offset);
qedf_ring_doorbell(fcport);
spin_unlock_irqrestore(&fcport->rport_lock, flags);
}
void qedf_process_seq_cleanup_compl(struct qedf_ctx *qedf,
struct fcoe_cqe *cqe, struct qedf_ioreq *io_req)
{
int rc;
struct qedf_els_cb_arg *cb_arg;
cb_arg = io_req->cb_arg;
/* If we timed out just free resources */
if (io_req->event == QEDF_IOREQ_EV_ELS_TMO || !cqe) {
QEDF_ERR(&qedf->dbg_ctx,
"cqe is NULL or timeout event (0x%x)", io_req->event);
goto free;
}
/* Kill the timer we put on the request */
cancel_delayed_work_sync(&io_req->timeout_work);
rc = qedf_send_srr(io_req, cb_arg->offset, cb_arg->r_ctl);
if (rc)
QEDF_ERR(&(qedf->dbg_ctx), "Unable to send SRR, I/O will "
"abort, xid=0x%x.\n", io_req->xid);
free:
kfree(cb_arg);
kref_put(&io_req->refcount, qedf_release_cmd);
}
static bool qedf_requeue_io_req(struct qedf_ioreq *orig_io_req)
{
struct qedf_rport *fcport;
struct qedf_ioreq *new_io_req;
unsigned long flags;
bool rc = false;
fcport = orig_io_req->fcport;
if (!fcport) {
QEDF_ERR(NULL, "fcport is NULL.\n");
goto out;
}
if (!orig_io_req->sc_cmd) {
QEDF_ERR(&(fcport->qedf->dbg_ctx), "sc_cmd is NULL for "
"xid=0x%x.\n", orig_io_req->xid);
goto out;
}
new_io_req = qedf_alloc_cmd(fcport, QEDF_SCSI_CMD);
if (!new_io_req) {
QEDF_ERR(&(fcport->qedf->dbg_ctx), "Could not allocate new "
"io_req.\n");
goto out;
}
new_io_req->sc_cmd = orig_io_req->sc_cmd;
/*
* This keeps the sc_cmd struct from being returned to the tape
* driver and being requeued twice. We do need to put a reference
* for the original I/O request since we will not do a SCSI completion
* for it.
*/
orig_io_req->sc_cmd = NULL;
kref_put(&orig_io_req->refcount, qedf_release_cmd);
spin_lock_irqsave(&fcport->rport_lock, flags);
/* kref for new command released in qedf_post_io_req on error */
if (qedf_post_io_req(fcport, new_io_req)) {
QEDF_ERR(&(fcport->qedf->dbg_ctx), "Unable to post io_req\n");
/* Return SQE to pool */
atomic_inc(&fcport->free_sqes);
} else {
QEDF_INFO(&(fcport->qedf->dbg_ctx), QEDF_LOG_ELS,
"Reissued SCSI command from orig_xid=0x%x on "
"new_xid=0x%x.\n", orig_io_req->xid, new_io_req->xid);
/*
* Abort the original I/O but do not return SCSI command as
* it has been reissued on another OX_ID.
*/
spin_unlock_irqrestore(&fcport->rport_lock, flags);
qedf_initiate_abts(orig_io_req, false);
goto out;
}
spin_unlock_irqrestore(&fcport->rport_lock, flags);
out:
return rc;
}
static void qedf_rec_compl(struct qedf_els_cb_arg *cb_arg)
{
struct qedf_ioreq *orig_io_req;
struct qedf_ioreq *rec_req;
struct qedf_mp_req *mp_req;
struct fc_frame_header *mp_fc_hdr, *fh;
struct fc_frame *fp;
void *resp_buf, *fc_payload;
u32 resp_len;
struct fc_lport *lport;
struct qedf_ctx *qedf;
int refcount;
enum fc_rctl r_ctl;
struct fc_els_ls_rjt *rjt;
struct fc_els_rec_acc *acc;
u8 opcode;
u32 offset, e_stat;
struct scsi_cmnd *sc_cmd;
bool srr_needed = false;
rec_req = cb_arg->io_req;
qedf = rec_req->fcport->qedf;
lport = qedf->lport;
orig_io_req = cb_arg->aborted_io_req;
if (!orig_io_req) {
QEDF_ERR(NULL, "orig_io_req is NULL.\n");
goto out_free;
}
if (rec_req->event != QEDF_IOREQ_EV_ELS_TMO &&
rec_req->event != QEDF_IOREQ_EV_ELS_ERR_DETECT)
cancel_delayed_work_sync(&orig_io_req->timeout_work);
refcount = kref_read(&orig_io_req->refcount);
QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_ELS, "Entered: orig_io=%p,"
" orig_io_xid=0x%x, rec_xid=0x%x, refcount=%d\n",
orig_io_req, orig_io_req->xid, rec_req->xid, refcount);
/* If a REC times out, free resources */
if (rec_req->event == QEDF_IOREQ_EV_ELS_TMO) {
QEDF_ERR(&qedf->dbg_ctx,
"Got TMO event, orig_io_req %p orig_io_xid=0x%x.\n",
orig_io_req, orig_io_req->xid);
goto out_put;
}
/* Normalize response data into struct fc_frame */
mp_req = &(rec_req->mp_req);
mp_fc_hdr = &(mp_req->resp_fc_hdr);
resp_len = mp_req->resp_len;
acc = resp_buf = mp_req->resp_buf;
fp = fc_frame_alloc(lport, resp_len);
if (!fp) {
QEDF_ERR(&(qedf->dbg_ctx),
"fc_frame_alloc failure.\n");
goto out_put;
}
/* Copy frame header from firmware into fp */
fh = (struct fc_frame_header *)fc_frame_header_get(fp);
memcpy(fh, mp_fc_hdr, sizeof(struct fc_frame_header));
/* Copy payload from firmware into fp */
fc_payload = fc_frame_payload_get(fp, resp_len);
memcpy(fc_payload, resp_buf, resp_len);
opcode = fc_frame_payload_op(fp);
if (opcode == ELS_LS_RJT) {
rjt = fc_frame_payload_get(fp, sizeof(*rjt));
QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_ELS,
"Received LS_RJT for REC: er_reason=0x%x, "
"er_explan=0x%x.\n", rjt->er_reason, rjt->er_explan);
/*
* The following response(s) mean that we need to reissue the
* request on another exchange. We need to do this without
* informing the upper layers lest it cause an application
* error.
*/
if ((rjt->er_reason == ELS_RJT_LOGIC ||
rjt->er_reason == ELS_RJT_UNAB) &&
rjt->er_explan == ELS_EXPL_OXID_RXID) {
QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_ELS,
"Handle CMD LOST case.\n");
qedf_requeue_io_req(orig_io_req);
}
} else if (opcode == ELS_LS_ACC) {
offset = ntohl(acc->reca_fc4value);
e_stat = ntohl(acc->reca_e_stat);
QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_ELS,
"Received LS_ACC for REC: offset=0x%x, e_stat=0x%x.\n",
offset, e_stat);
if (e_stat & ESB_ST_SEQ_INIT) {
QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_ELS,
"Target has the seq init\n");
goto out_free_frame;
}
sc_cmd = orig_io_req->sc_cmd;
if (!sc_cmd) {
QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_ELS,
"sc_cmd is NULL for xid=0x%x.\n",
orig_io_req->xid);
goto out_free_frame;
}
/* SCSI write case */
if (sc_cmd->sc_data_direction == DMA_TO_DEVICE) {
if (offset == orig_io_req->data_xfer_len) {
QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_ELS,
"WRITE - response lost.\n");
r_ctl = FC_RCTL_DD_CMD_STATUS;
srr_needed = true;
offset = 0;
} else {
QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_ELS,
"WRITE - XFER_RDY/DATA lost.\n");
r_ctl = FC_RCTL_DD_DATA_DESC;
/* Use data from warning CQE instead of REC */
offset = orig_io_req->tx_buf_off;
}
/* SCSI read case */
} else {
if (orig_io_req->rx_buf_off ==
orig_io_req->data_xfer_len) {
QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_ELS,
"READ - response lost.\n");
srr_needed = true;
r_ctl = FC_RCTL_DD_CMD_STATUS;
offset = 0;
} else {
QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_ELS,
"READ - DATA lost.\n");
/*
* For read case we always set the offset to 0
* for sequence recovery task.
*/
offset = 0;
r_ctl = FC_RCTL_DD_SOL_DATA;
}
}
if (srr_needed)
qedf_send_srr(orig_io_req, offset, r_ctl);
else
qedf_initiate_seq_cleanup(orig_io_req, offset, r_ctl);
}
out_free_frame:
fc_frame_free(fp);
out_put:
/* Put reference for original command since REC completed */
kref_put(&orig_io_req->refcount, qedf_release_cmd);
out_free:
kfree(cb_arg);
}
/* Assumes kref is already held by caller */
int qedf_send_rec(struct qedf_ioreq *orig_io_req)
{
struct fc_els_rec rec;
struct qedf_rport *fcport;
struct fc_lport *lport;
struct qedf_els_cb_arg *cb_arg = NULL;
struct qedf_ctx *qedf;
uint32_t sid;
uint32_t r_a_tov;
int rc;
if (!orig_io_req) {
QEDF_ERR(NULL, "orig_io_req is NULL.\n");
return -EINVAL;
}
fcport = orig_io_req->fcport;
/* Check that fcport is still offloaded */
if (!test_bit(QEDF_RPORT_SESSION_READY, &fcport->flags)) {
QEDF_ERR(NULL, "fcport is no longer offloaded.\n");
return -EINVAL;
}
if (!fcport->qedf) {
QEDF_ERR(NULL, "fcport->qedf is NULL.\n");
return -EINVAL;
}
/* Take reference until REC command completion */
kref_get(&orig_io_req->refcount);
qedf = fcport->qedf;
lport = qedf->lport;
sid = fcport->sid;
r_a_tov = lport->r_a_tov;
memset(&rec, 0, sizeof(rec));
cb_arg = kzalloc(sizeof(struct qedf_els_cb_arg), GFP_NOIO);
if (!cb_arg) {
QEDF_ERR(&(qedf->dbg_ctx), "Unable to allocate cb_arg for "
"REC\n");
rc = -ENOMEM;
goto rec_err;
}
cb_arg->aborted_io_req = orig_io_req;
rec.rec_cmd = ELS_REC;
hton24(rec.rec_s_id, sid);
rec.rec_ox_id = htons(orig_io_req->xid);
rec.rec_rx_id =
htons(orig_io_req->task->tstorm_st_context.read_write.rx_id);
QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_ELS, "Sending REC orig_io=%p, "
"orig_xid=0x%x rx_id=0x%x\n", orig_io_req,
orig_io_req->xid, rec.rec_rx_id);
rc = qedf_initiate_els(fcport, ELS_REC, &rec, sizeof(rec),
qedf_rec_compl, cb_arg, r_a_tov);
rec_err:
if (rc) {
QEDF_ERR(&(qedf->dbg_ctx), "REC failed - release orig_io_req"
"=0x%x\n", orig_io_req->xid);
kfree(cb_arg);
kref_put(&orig_io_req->refcount, qedf_release_cmd);
}
return rc;
}