2019-02-18 16:33:28 +08:00
|
|
|
// SPDX-License-Identifier: GPL-2.0
|
2016-12-02 16:28:42 +08:00
|
|
|
/*
|
|
|
|
* Copyright (c) 2016 Avago Technologies. All rights reserved.
|
|
|
|
*/
|
|
|
|
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
|
|
|
|
#include <linux/module.h>
|
|
|
|
#include <linux/parser.h>
|
|
|
|
#include <uapi/scsi/fc/fc_fs.h>
|
|
|
|
#include <uapi/scsi/fc/fc_els.h>
|
2017-04-23 23:30:08 +08:00
|
|
|
#include <linux/delay.h>
|
2018-10-09 05:28:45 +08:00
|
|
|
#include <linux/overflow.h>
|
2016-12-02 16:28:42 +08:00
|
|
|
|
|
|
|
#include "nvme.h"
|
|
|
|
#include "fabrics.h"
|
|
|
|
#include <linux/nvme-fc-driver.h>
|
|
|
|
#include <linux/nvme-fc.h>
|
2019-04-10 22:16:19 +08:00
|
|
|
#include <scsi/scsi_transport_fc.h>
|
2016-12-02 16:28:42 +08:00
|
|
|
|
|
|
|
/* *************************** Data Structures/Defines ****************** */
|
|
|
|
|
|
|
|
|
|
|
|
enum nvme_fc_queue_flags {
|
2017-11-25 01:12:33 +08:00
|
|
|
NVME_FC_Q_CONNECTED = 0,
|
|
|
|
NVME_FC_Q_LIVE,
|
2016-12-02 16:28:42 +08:00
|
|
|
};
|
|
|
|
|
2017-10-26 07:43:15 +08:00
|
|
|
#define NVME_FC_DEFAULT_DEV_LOSS_TMO 60 /* seconds */
|
|
|
|
|
2016-12-02 16:28:42 +08:00
|
|
|
struct nvme_fc_queue {
|
|
|
|
struct nvme_fc_ctrl *ctrl;
|
|
|
|
struct device *dev;
|
|
|
|
struct blk_mq_hw_ctx *hctx;
|
|
|
|
void *lldd_handle;
|
|
|
|
size_t cmnd_capsule_len;
|
|
|
|
u32 qnum;
|
|
|
|
u32 rqcnt;
|
|
|
|
u32 seqno;
|
|
|
|
|
|
|
|
u64 connection_id;
|
|
|
|
atomic_t csn;
|
|
|
|
|
|
|
|
unsigned long flags;
|
|
|
|
} __aligned(sizeof(u64)); /* alignment for other things alloc'd with */
|
|
|
|
|
2017-04-12 02:35:09 +08:00
|
|
|
enum nvme_fcop_flags {
|
|
|
|
FCOP_FLAGS_TERMIO = (1 << 0),
|
2018-02-06 22:48:30 +08:00
|
|
|
FCOP_FLAGS_AEN = (1 << 1),
|
2017-04-12 02:35:09 +08:00
|
|
|
};
|
|
|
|
|
2016-12-02 16:28:42 +08:00
|
|
|
struct nvmefc_ls_req_op {
|
|
|
|
struct nvmefc_ls_req ls_req;
|
|
|
|
|
2017-04-12 02:35:08 +08:00
|
|
|
struct nvme_fc_rport *rport;
|
2016-12-02 16:28:42 +08:00
|
|
|
struct nvme_fc_queue *queue;
|
|
|
|
struct request *rq;
|
2017-04-12 02:35:09 +08:00
|
|
|
u32 flags;
|
2016-12-02 16:28:42 +08:00
|
|
|
|
|
|
|
int ls_error;
|
|
|
|
struct completion ls_done;
|
2017-04-12 02:35:08 +08:00
|
|
|
struct list_head lsreq_list; /* rport->ls_req_list */
|
2016-12-02 16:28:42 +08:00
|
|
|
bool req_queued;
|
|
|
|
};
|
|
|
|
|
|
|
|
enum nvme_fcpop_state {
|
|
|
|
FCPOP_STATE_UNINIT = 0,
|
|
|
|
FCPOP_STATE_IDLE = 1,
|
|
|
|
FCPOP_STATE_ACTIVE = 2,
|
|
|
|
FCPOP_STATE_ABORTED = 3,
|
2017-04-23 23:30:07 +08:00
|
|
|
FCPOP_STATE_COMPLETE = 4,
|
2016-12-02 16:28:42 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
struct nvme_fc_fcp_op {
|
|
|
|
struct nvme_request nreq; /*
|
|
|
|
* nvme/host/core.c
|
|
|
|
* requires this to be
|
|
|
|
* the 1st element in the
|
|
|
|
* private structure
|
|
|
|
* associated with the
|
|
|
|
* request.
|
|
|
|
*/
|
|
|
|
struct nvmefc_fcp_req fcp_req;
|
|
|
|
|
|
|
|
struct nvme_fc_ctrl *ctrl;
|
|
|
|
struct nvme_fc_queue *queue;
|
|
|
|
struct request *rq;
|
|
|
|
|
|
|
|
atomic_t state;
|
2017-04-23 23:30:07 +08:00
|
|
|
u32 flags;
|
2016-12-02 16:28:42 +08:00
|
|
|
u32 rqno;
|
|
|
|
u32 nents;
|
|
|
|
|
|
|
|
struct nvme_fc_cmd_iu cmd_iu;
|
|
|
|
struct nvme_fc_ersp_iu rsp_iu;
|
|
|
|
};
|
|
|
|
|
2018-10-09 05:28:45 +08:00
|
|
|
struct nvme_fcp_op_w_sgl {
|
|
|
|
struct nvme_fc_fcp_op op;
|
|
|
|
struct scatterlist sgl[SG_CHUNK_SIZE];
|
|
|
|
uint8_t priv[0];
|
|
|
|
};
|
|
|
|
|
2016-12-02 16:28:42 +08:00
|
|
|
struct nvme_fc_lport {
|
|
|
|
struct nvme_fc_local_port localport;
|
|
|
|
|
|
|
|
struct ida endp_cnt;
|
|
|
|
struct list_head port_list; /* nvme_fc_port_list */
|
|
|
|
struct list_head endp_list;
|
|
|
|
struct device *dev; /* physical device for dma */
|
|
|
|
struct nvme_fc_port_template *ops;
|
|
|
|
struct kref ref;
|
nvme-fc: decouple ns references from lldd references
In the lldd api, a lldd may unregister a remoteport (loss of connectivity
or driver unload) or localport (driver unload). The lldd must wait for the
remoteport_delete or localport_delete before completing its actions post
the unregister. The xxx_deletes currently occur only when the xxxport
structure is fully freed after all references are removed. Thus the lldd
may be held hostage until an app or in-kernel entity that has a namespace
open finally closes so the namespace can be removed, the controller
removed, thus the transport objects, thus the lldd.
This patch decouples the transport and os-facing objects from the lldd
and the remoteport and localport. There is a point in all deletions
where the transport will no longer interact with the lldd on behalf of
a controller. That point centers around the association established
with the target/subsystem. It will access the lldd whenever it attempts
to create an association and while the association is active. New
associations may only be created if the remoteport is live (thus the
localport is live). It will not access the lldd after deleting the
association.
Therefore, the patch tracks the count of active controllers - those with
associations being created or that are active - on a remoteport. It also
tracks the number of remoteports that have active controllers, on a
a localport. When a remoteport is unregistered, as soon as there are no
active controllers, the lldd's remoteport_delete may be called and the
lldd may continue. Similarly, when a localport is unregistered, as soon
as there are no remoteports with active controllers, the localport_delete
callback may be made. This significantly speeds up unregistration with
the lldd.
The transport objects continue in suspended status with reconnect timers
running, and upon expiration, normal ref-counting will occur and the
objects will be freed. The transport object may still be held hostage
by the application/kernel module, but that is acceptable.
With this change, the lldd may be fully unloaded and reloaded, and
if registrations occur prior to the timeouts, the nvme controller and
namespaces will resume normally as if a link bounce.
Signed-off-by: James Smart <james.smart@broadcom.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
2017-11-03 23:13:17 +08:00
|
|
|
atomic_t act_rport_cnt;
|
2016-12-02 16:28:42 +08:00
|
|
|
} __aligned(sizeof(u64)); /* alignment for other things alloc'd with */
|
|
|
|
|
|
|
|
struct nvme_fc_rport {
|
|
|
|
struct nvme_fc_remote_port remoteport;
|
|
|
|
|
|
|
|
struct list_head endp_list; /* for lport->endp_list */
|
|
|
|
struct list_head ctrl_list;
|
2017-04-12 02:35:08 +08:00
|
|
|
struct list_head ls_req_list;
|
2018-09-14 07:17:38 +08:00
|
|
|
struct list_head disc_list;
|
2017-04-12 02:35:08 +08:00
|
|
|
struct device *dev; /* physical device for dma */
|
|
|
|
struct nvme_fc_lport *lport;
|
2016-12-02 16:28:42 +08:00
|
|
|
spinlock_t lock;
|
|
|
|
struct kref ref;
|
nvme-fc: decouple ns references from lldd references
In the lldd api, a lldd may unregister a remoteport (loss of connectivity
or driver unload) or localport (driver unload). The lldd must wait for the
remoteport_delete or localport_delete before completing its actions post
the unregister. The xxx_deletes currently occur only when the xxxport
structure is fully freed after all references are removed. Thus the lldd
may be held hostage until an app or in-kernel entity that has a namespace
open finally closes so the namespace can be removed, the controller
removed, thus the transport objects, thus the lldd.
This patch decouples the transport and os-facing objects from the lldd
and the remoteport and localport. There is a point in all deletions
where the transport will no longer interact with the lldd on behalf of
a controller. That point centers around the association established
with the target/subsystem. It will access the lldd whenever it attempts
to create an association and while the association is active. New
associations may only be created if the remoteport is live (thus the
localport is live). It will not access the lldd after deleting the
association.
Therefore, the patch tracks the count of active controllers - those with
associations being created or that are active - on a remoteport. It also
tracks the number of remoteports that have active controllers, on a
a localport. When a remoteport is unregistered, as soon as there are no
active controllers, the lldd's remoteport_delete may be called and the
lldd may continue. Similarly, when a localport is unregistered, as soon
as there are no remoteports with active controllers, the localport_delete
callback may be made. This significantly speeds up unregistration with
the lldd.
The transport objects continue in suspended status with reconnect timers
running, and upon expiration, normal ref-counting will occur and the
objects will be freed. The transport object may still be held hostage
by the application/kernel module, but that is acceptable.
With this change, the lldd may be fully unloaded and reloaded, and
if registrations occur prior to the timeouts, the nvme controller and
namespaces will resume normally as if a link bounce.
Signed-off-by: James Smart <james.smart@broadcom.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
2017-11-03 23:13:17 +08:00
|
|
|
atomic_t act_ctrl_cnt;
|
2017-10-26 07:43:17 +08:00
|
|
|
unsigned long dev_loss_end;
|
2016-12-02 16:28:42 +08:00
|
|
|
} __aligned(sizeof(u64)); /* alignment for other things alloc'd with */
|
|
|
|
|
2017-04-23 23:30:08 +08:00
|
|
|
enum nvme_fcctrl_flags {
|
|
|
|
FCCTRL_TERMIO = (1 << 0),
|
2016-12-02 16:28:42 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
struct nvme_fc_ctrl {
|
|
|
|
spinlock_t lock;
|
|
|
|
struct nvme_fc_queue *queues;
|
|
|
|
struct device *dev;
|
|
|
|
struct nvme_fc_lport *lport;
|
|
|
|
struct nvme_fc_rport *rport;
|
|
|
|
u32 cnum;
|
|
|
|
|
nvme-fc: change controllers first connect to use reconnect path
Current code follows the framework that has been in the transports
from the beginning where initial link-side controller connect occurs
as part of "creating the controller". Thus that first connect fully
talks to the controller and obtains values that can then be used in
for blk-mq setup, etc. It also means that everything about the
controller is fully know before the "create controller" call returns.
This has several weaknesses:
- The initial create_ctrl call made by the cli will block for a long
time as wire transactions are performed synchronously. This delay
becomes longer if errors occur or connectivity is lost and retries
need to be performed.
- Code wise, it means there is a separate connect path for initial
controller connect vs the (same) steps used in the reconnect path.
- And as there's separate paths, it means there's separate error
handling and retry logic. It also plays havoc with the NEW state
(should transition out of it after successful initial connect) vs
the RESETTING and CONNECTING (reconnect) states that want to be
transitioned to on error.
- As there's separate paths, to recover from errors and disruptions,
it requires separate recovery/retry paths as well and can severely
convolute the controller state.
This patch reworks the fc transport to use the same connect paths
for the initial connection as it uses for reconnect. This makes a
single path for error recovery and handling.
This patch:
- Removes the driving of the initial connect and replaces it with
a state transition to CONNECTING and initiating the reconnect
thread. A dummy state transition of RESETTING had to be traversed
as a direct transtion of NEW->CONNECTING is not allowed. Given
that the controller is "new", the RESETTING transition is a simple
no-op. Once in the reconnecting thread, the normal behaviors of
ctrl_loss_tmo (max_retries * connect_delay) and dev_loss_tmo will
apply before the controller is torn down.
- Only if the state transitions couldn't be traversed and the
reconnect thread not scheduled, will the controller be torn down
while in create_ctrl.
- The prior code used the controller state of NEW to indicate
whether request queues had been initialized or not. For the admin
queue, the request queue is always created, so there's no need to
check a state. For IO queues, change to tracking whether a successful
io request queue create has occurred (e.g. 1st successful connect).
- The initial controller id is initialized to the dynamic controller
id used in the initial connect message. It will be overwritten by
the real controller id once the controller is connected on the wire.
Signed-off-by: James Smart <james.smart@broadcom.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
2018-06-14 05:07:37 +08:00
|
|
|
bool ioq_live;
|
nvme-fc: decouple ns references from lldd references
In the lldd api, a lldd may unregister a remoteport (loss of connectivity
or driver unload) or localport (driver unload). The lldd must wait for the
remoteport_delete or localport_delete before completing its actions post
the unregister. The xxx_deletes currently occur only when the xxxport
structure is fully freed after all references are removed. Thus the lldd
may be held hostage until an app or in-kernel entity that has a namespace
open finally closes so the namespace can be removed, the controller
removed, thus the transport objects, thus the lldd.
This patch decouples the transport and os-facing objects from the lldd
and the remoteport and localport. There is a point in all deletions
where the transport will no longer interact with the lldd on behalf of
a controller. That point centers around the association established
with the target/subsystem. It will access the lldd whenever it attempts
to create an association and while the association is active. New
associations may only be created if the remoteport is live (thus the
localport is live). It will not access the lldd after deleting the
association.
Therefore, the patch tracks the count of active controllers - those with
associations being created or that are active - on a remoteport. It also
tracks the number of remoteports that have active controllers, on a
a localport. When a remoteport is unregistered, as soon as there are no
active controllers, the lldd's remoteport_delete may be called and the
lldd may continue. Similarly, when a localport is unregistered, as soon
as there are no remoteports with active controllers, the localport_delete
callback may be made. This significantly speeds up unregistration with
the lldd.
The transport objects continue in suspended status with reconnect timers
running, and upon expiration, normal ref-counting will occur and the
objects will be freed. The transport object may still be held hostage
by the application/kernel module, but that is acceptable.
With this change, the lldd may be fully unloaded and reloaded, and
if registrations occur prior to the timeouts, the nvme controller and
namespaces will resume normally as if a link bounce.
Signed-off-by: James Smart <james.smart@broadcom.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
2017-11-03 23:13:17 +08:00
|
|
|
bool assoc_active;
|
2018-11-15 08:35:10 +08:00
|
|
|
atomic_t err_work_active;
|
2016-12-02 16:28:42 +08:00
|
|
|
u64 association_id;
|
|
|
|
|
|
|
|
struct list_head ctrl_list; /* rport->ctrl_list */
|
|
|
|
|
|
|
|
struct blk_mq_tag_set admin_tag_set;
|
|
|
|
struct blk_mq_tag_set tag_set;
|
|
|
|
|
2017-04-23 23:30:08 +08:00
|
|
|
struct delayed_work connect_work;
|
2018-11-15 08:35:10 +08:00
|
|
|
struct work_struct err_work;
|
2017-04-23 23:30:08 +08:00
|
|
|
|
2016-12-02 16:28:42 +08:00
|
|
|
struct kref ref;
|
2017-04-23 23:30:08 +08:00
|
|
|
u32 flags;
|
|
|
|
u32 iocnt;
|
2017-05-23 06:28:42 +08:00
|
|
|
wait_queue_head_t ioabort_wait;
|
2016-12-02 16:28:42 +08:00
|
|
|
|
2017-11-08 06:13:10 +08:00
|
|
|
struct nvme_fc_fcp_op aen_ops[NVME_NR_AEN_COMMANDS];
|
2016-12-02 16:28:42 +08:00
|
|
|
|
|
|
|
struct nvme_ctrl ctrl;
|
|
|
|
};
|
|
|
|
|
|
|
|
static inline struct nvme_fc_ctrl *
|
|
|
|
to_fc_ctrl(struct nvme_ctrl *ctrl)
|
|
|
|
{
|
|
|
|
return container_of(ctrl, struct nvme_fc_ctrl, ctrl);
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline struct nvme_fc_lport *
|
|
|
|
localport_to_lport(struct nvme_fc_local_port *portptr)
|
|
|
|
{
|
|
|
|
return container_of(portptr, struct nvme_fc_lport, localport);
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline struct nvme_fc_rport *
|
|
|
|
remoteport_to_rport(struct nvme_fc_remote_port *portptr)
|
|
|
|
{
|
|
|
|
return container_of(portptr, struct nvme_fc_rport, remoteport);
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline struct nvmefc_ls_req_op *
|
|
|
|
ls_req_to_lsop(struct nvmefc_ls_req *lsreq)
|
|
|
|
{
|
|
|
|
return container_of(lsreq, struct nvmefc_ls_req_op, ls_req);
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline struct nvme_fc_fcp_op *
|
|
|
|
fcp_req_to_fcp_op(struct nvmefc_fcp_req *fcpreq)
|
|
|
|
{
|
|
|
|
return container_of(fcpreq, struct nvme_fc_fcp_op, fcp_req);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/* *************************** Globals **************************** */
|
|
|
|
|
|
|
|
|
|
|
|
static DEFINE_SPINLOCK(nvme_fc_lock);
|
|
|
|
|
|
|
|
static LIST_HEAD(nvme_fc_lport_list);
|
|
|
|
static DEFINE_IDA(nvme_fc_local_port_cnt);
|
|
|
|
static DEFINE_IDA(nvme_fc_ctrl_cnt);
|
|
|
|
|
2019-05-03 17:43:52 +08:00
|
|
|
static struct workqueue_struct *nvme_fc_wq;
|
2016-12-02 16:28:42 +08:00
|
|
|
|
2019-06-29 08:26:08 +08:00
|
|
|
static bool nvme_fc_waiting_to_unload;
|
|
|
|
static DECLARE_COMPLETION(nvme_fc_unload_proceed);
|
|
|
|
|
2017-09-15 01:38:41 +08:00
|
|
|
/*
|
|
|
|
* These items are short-term. They will eventually be moved into
|
|
|
|
* a generic FC class. See comments in module init.
|
|
|
|
*/
|
|
|
|
static struct device *fc_udev_device;
|
|
|
|
|
2016-12-02 16:28:42 +08:00
|
|
|
|
|
|
|
/* *********************** FC-NVME Port Management ************************ */
|
|
|
|
|
|
|
|
static void __nvme_fc_delete_hw_queue(struct nvme_fc_ctrl *,
|
|
|
|
struct nvme_fc_queue *, unsigned int);
|
|
|
|
|
2017-08-01 04:20:30 +08:00
|
|
|
static void
|
|
|
|
nvme_fc_free_lport(struct kref *ref)
|
|
|
|
{
|
|
|
|
struct nvme_fc_lport *lport =
|
|
|
|
container_of(ref, struct nvme_fc_lport, ref);
|
|
|
|
unsigned long flags;
|
|
|
|
|
|
|
|
WARN_ON(lport->localport.port_state != FC_OBJSTATE_DELETED);
|
|
|
|
WARN_ON(!list_empty(&lport->endp_list));
|
|
|
|
|
|
|
|
/* remove from transport list */
|
|
|
|
spin_lock_irqsave(&nvme_fc_lock, flags);
|
|
|
|
list_del(&lport->port_list);
|
2019-06-29 08:26:08 +08:00
|
|
|
if (nvme_fc_waiting_to_unload && list_empty(&nvme_fc_lport_list))
|
|
|
|
complete(&nvme_fc_unload_proceed);
|
2017-08-01 04:20:30 +08:00
|
|
|
spin_unlock_irqrestore(&nvme_fc_lock, flags);
|
|
|
|
|
|
|
|
ida_simple_remove(&nvme_fc_local_port_cnt, lport->localport.port_num);
|
|
|
|
ida_destroy(&lport->endp_cnt);
|
|
|
|
|
|
|
|
put_device(lport->dev);
|
|
|
|
|
|
|
|
kfree(lport);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
nvme_fc_lport_put(struct nvme_fc_lport *lport)
|
|
|
|
{
|
|
|
|
kref_put(&lport->ref, nvme_fc_free_lport);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
|
|
nvme_fc_lport_get(struct nvme_fc_lport *lport)
|
|
|
|
{
|
|
|
|
return kref_get_unless_zero(&lport->ref);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
static struct nvme_fc_lport *
|
2017-11-03 23:13:16 +08:00
|
|
|
nvme_fc_attach_to_unreg_lport(struct nvme_fc_port_info *pinfo,
|
|
|
|
struct nvme_fc_port_template *ops,
|
|
|
|
struct device *dev)
|
2017-08-01 04:20:30 +08:00
|
|
|
{
|
|
|
|
struct nvme_fc_lport *lport;
|
|
|
|
unsigned long flags;
|
|
|
|
|
|
|
|
spin_lock_irqsave(&nvme_fc_lock, flags);
|
|
|
|
|
|
|
|
list_for_each_entry(lport, &nvme_fc_lport_list, port_list) {
|
|
|
|
if (lport->localport.node_name != pinfo->node_name ||
|
|
|
|
lport->localport.port_name != pinfo->port_name)
|
|
|
|
continue;
|
|
|
|
|
2017-11-03 23:13:16 +08:00
|
|
|
if (lport->dev != dev) {
|
|
|
|
lport = ERR_PTR(-EXDEV);
|
|
|
|
goto out_done;
|
|
|
|
}
|
|
|
|
|
2017-08-01 04:20:30 +08:00
|
|
|
if (lport->localport.port_state != FC_OBJSTATE_DELETED) {
|
|
|
|
lport = ERR_PTR(-EEXIST);
|
|
|
|
goto out_done;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!nvme_fc_lport_get(lport)) {
|
|
|
|
/*
|
|
|
|
* fails if ref cnt already 0. If so,
|
|
|
|
* act as if lport already deleted
|
|
|
|
*/
|
|
|
|
lport = NULL;
|
|
|
|
goto out_done;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* resume the lport */
|
|
|
|
|
2017-11-03 23:13:16 +08:00
|
|
|
lport->ops = ops;
|
2017-08-01 04:20:30 +08:00
|
|
|
lport->localport.port_role = pinfo->port_role;
|
|
|
|
lport->localport.port_id = pinfo->port_id;
|
|
|
|
lport->localport.port_state = FC_OBJSTATE_ONLINE;
|
|
|
|
|
|
|
|
spin_unlock_irqrestore(&nvme_fc_lock, flags);
|
|
|
|
|
|
|
|
return lport;
|
|
|
|
}
|
|
|
|
|
|
|
|
lport = NULL;
|
|
|
|
|
|
|
|
out_done:
|
|
|
|
spin_unlock_irqrestore(&nvme_fc_lock, flags);
|
|
|
|
|
|
|
|
return lport;
|
|
|
|
}
|
2016-12-02 16:28:42 +08:00
|
|
|
|
|
|
|
/**
|
|
|
|
* nvme_fc_register_localport - transport entry point called by an
|
|
|
|
* LLDD to register the existence of a NVME
|
|
|
|
* host FC port.
|
|
|
|
* @pinfo: pointer to information about the port to be registered
|
|
|
|
* @template: LLDD entrypoints and operational parameters for the port
|
|
|
|
* @dev: physical hardware device node port corresponds to. Will be
|
|
|
|
* used for DMA mappings
|
2018-10-09 05:28:44 +08:00
|
|
|
* @portptr: pointer to a local port pointer. Upon success, the routine
|
2016-12-02 16:28:42 +08:00
|
|
|
* will allocate a nvme_fc_local_port structure and place its
|
|
|
|
* address in the local port pointer. Upon failure, local port
|
|
|
|
* pointer will be set to 0.
|
|
|
|
*
|
|
|
|
* Returns:
|
|
|
|
* a completion status. Must be 0 upon success; a negative errno
|
|
|
|
* (ex: -ENXIO) upon failure.
|
|
|
|
*/
|
|
|
|
int
|
|
|
|
nvme_fc_register_localport(struct nvme_fc_port_info *pinfo,
|
|
|
|
struct nvme_fc_port_template *template,
|
|
|
|
struct device *dev,
|
|
|
|
struct nvme_fc_local_port **portptr)
|
|
|
|
{
|
|
|
|
struct nvme_fc_lport *newrec;
|
|
|
|
unsigned long flags;
|
|
|
|
int ret, idx;
|
|
|
|
|
|
|
|
if (!template->localport_delete || !template->remoteport_delete ||
|
|
|
|
!template->ls_req || !template->fcp_io ||
|
|
|
|
!template->ls_abort || !template->fcp_abort ||
|
|
|
|
!template->max_hw_queues || !template->max_sgl_segments ||
|
|
|
|
!template->max_dif_sgl_segments || !template->dma_boundary) {
|
|
|
|
ret = -EINVAL;
|
|
|
|
goto out_reghost_failed;
|
|
|
|
}
|
|
|
|
|
2017-08-01 04:20:30 +08:00
|
|
|
/*
|
|
|
|
* look to see if there is already a localport that had been
|
|
|
|
* deregistered and in the process of waiting for all the
|
|
|
|
* references to fully be removed. If the references haven't
|
|
|
|
* expired, we can simply re-enable the localport. Remoteports
|
|
|
|
* and controller reconnections should resume naturally.
|
|
|
|
*/
|
2017-11-03 23:13:16 +08:00
|
|
|
newrec = nvme_fc_attach_to_unreg_lport(pinfo, template, dev);
|
2017-08-01 04:20:30 +08:00
|
|
|
|
|
|
|
/* found an lport, but something about its state is bad */
|
|
|
|
if (IS_ERR(newrec)) {
|
|
|
|
ret = PTR_ERR(newrec);
|
|
|
|
goto out_reghost_failed;
|
|
|
|
|
|
|
|
/* found existing lport, which was resumed */
|
|
|
|
} else if (newrec) {
|
|
|
|
*portptr = &newrec->localport;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* nothing found - allocate a new localport struct */
|
|
|
|
|
2016-12-02 16:28:42 +08:00
|
|
|
newrec = kmalloc((sizeof(*newrec) + template->local_priv_sz),
|
|
|
|
GFP_KERNEL);
|
|
|
|
if (!newrec) {
|
|
|
|
ret = -ENOMEM;
|
|
|
|
goto out_reghost_failed;
|
|
|
|
}
|
|
|
|
|
|
|
|
idx = ida_simple_get(&nvme_fc_local_port_cnt, 0, 0, GFP_KERNEL);
|
|
|
|
if (idx < 0) {
|
|
|
|
ret = -ENOSPC;
|
|
|
|
goto out_fail_kfree;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!get_device(dev) && dev) {
|
|
|
|
ret = -ENODEV;
|
|
|
|
goto out_ida_put;
|
|
|
|
}
|
|
|
|
|
|
|
|
INIT_LIST_HEAD(&newrec->port_list);
|
|
|
|
INIT_LIST_HEAD(&newrec->endp_list);
|
|
|
|
kref_init(&newrec->ref);
|
nvme-fc: decouple ns references from lldd references
In the lldd api, a lldd may unregister a remoteport (loss of connectivity
or driver unload) or localport (driver unload). The lldd must wait for the
remoteport_delete or localport_delete before completing its actions post
the unregister. The xxx_deletes currently occur only when the xxxport
structure is fully freed after all references are removed. Thus the lldd
may be held hostage until an app or in-kernel entity that has a namespace
open finally closes so the namespace can be removed, the controller
removed, thus the transport objects, thus the lldd.
This patch decouples the transport and os-facing objects from the lldd
and the remoteport and localport. There is a point in all deletions
where the transport will no longer interact with the lldd on behalf of
a controller. That point centers around the association established
with the target/subsystem. It will access the lldd whenever it attempts
to create an association and while the association is active. New
associations may only be created if the remoteport is live (thus the
localport is live). It will not access the lldd after deleting the
association.
Therefore, the patch tracks the count of active controllers - those with
associations being created or that are active - on a remoteport. It also
tracks the number of remoteports that have active controllers, on a
a localport. When a remoteport is unregistered, as soon as there are no
active controllers, the lldd's remoteport_delete may be called and the
lldd may continue. Similarly, when a localport is unregistered, as soon
as there are no remoteports with active controllers, the localport_delete
callback may be made. This significantly speeds up unregistration with
the lldd.
The transport objects continue in suspended status with reconnect timers
running, and upon expiration, normal ref-counting will occur and the
objects will be freed. The transport object may still be held hostage
by the application/kernel module, but that is acceptable.
With this change, the lldd may be fully unloaded and reloaded, and
if registrations occur prior to the timeouts, the nvme controller and
namespaces will resume normally as if a link bounce.
Signed-off-by: James Smart <james.smart@broadcom.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
2017-11-03 23:13:17 +08:00
|
|
|
atomic_set(&newrec->act_rport_cnt, 0);
|
2016-12-02 16:28:42 +08:00
|
|
|
newrec->ops = template;
|
|
|
|
newrec->dev = dev;
|
|
|
|
ida_init(&newrec->endp_cnt);
|
|
|
|
newrec->localport.private = &newrec[1];
|
|
|
|
newrec->localport.node_name = pinfo->node_name;
|
|
|
|
newrec->localport.port_name = pinfo->port_name;
|
|
|
|
newrec->localport.port_role = pinfo->port_role;
|
|
|
|
newrec->localport.port_id = pinfo->port_id;
|
|
|
|
newrec->localport.port_state = FC_OBJSTATE_ONLINE;
|
|
|
|
newrec->localport.port_num = idx;
|
|
|
|
|
|
|
|
spin_lock_irqsave(&nvme_fc_lock, flags);
|
|
|
|
list_add_tail(&newrec->port_list, &nvme_fc_lport_list);
|
|
|
|
spin_unlock_irqrestore(&nvme_fc_lock, flags);
|
|
|
|
|
|
|
|
if (dev)
|
|
|
|
dma_set_seg_boundary(dev, template->dma_boundary);
|
|
|
|
|
|
|
|
*portptr = &newrec->localport;
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
out_ida_put:
|
|
|
|
ida_simple_remove(&nvme_fc_local_port_cnt, idx);
|
|
|
|
out_fail_kfree:
|
|
|
|
kfree(newrec);
|
|
|
|
out_reghost_failed:
|
|
|
|
*portptr = NULL;
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL_GPL(nvme_fc_register_localport);
|
|
|
|
|
|
|
|
/**
|
|
|
|
* nvme_fc_unregister_localport - transport entry point called by an
|
|
|
|
* LLDD to deregister/remove a previously
|
|
|
|
* registered a NVME host FC port.
|
2018-10-09 05:28:44 +08:00
|
|
|
* @portptr: pointer to the (registered) local port that is to be deregistered.
|
2016-12-02 16:28:42 +08:00
|
|
|
*
|
|
|
|
* Returns:
|
|
|
|
* a completion status. Must be 0 upon success; a negative errno
|
|
|
|
* (ex: -ENXIO) upon failure.
|
|
|
|
*/
|
|
|
|
int
|
|
|
|
nvme_fc_unregister_localport(struct nvme_fc_local_port *portptr)
|
|
|
|
{
|
|
|
|
struct nvme_fc_lport *lport = localport_to_lport(portptr);
|
|
|
|
unsigned long flags;
|
|
|
|
|
|
|
|
if (!portptr)
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
spin_lock_irqsave(&nvme_fc_lock, flags);
|
|
|
|
|
|
|
|
if (portptr->port_state != FC_OBJSTATE_ONLINE) {
|
|
|
|
spin_unlock_irqrestore(&nvme_fc_lock, flags);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
portptr->port_state = FC_OBJSTATE_DELETED;
|
|
|
|
|
|
|
|
spin_unlock_irqrestore(&nvme_fc_lock, flags);
|
|
|
|
|
nvme-fc: decouple ns references from lldd references
In the lldd api, a lldd may unregister a remoteport (loss of connectivity
or driver unload) or localport (driver unload). The lldd must wait for the
remoteport_delete or localport_delete before completing its actions post
the unregister. The xxx_deletes currently occur only when the xxxport
structure is fully freed after all references are removed. Thus the lldd
may be held hostage until an app or in-kernel entity that has a namespace
open finally closes so the namespace can be removed, the controller
removed, thus the transport objects, thus the lldd.
This patch decouples the transport and os-facing objects from the lldd
and the remoteport and localport. There is a point in all deletions
where the transport will no longer interact with the lldd on behalf of
a controller. That point centers around the association established
with the target/subsystem. It will access the lldd whenever it attempts
to create an association and while the association is active. New
associations may only be created if the remoteport is live (thus the
localport is live). It will not access the lldd after deleting the
association.
Therefore, the patch tracks the count of active controllers - those with
associations being created or that are active - on a remoteport. It also
tracks the number of remoteports that have active controllers, on a
a localport. When a remoteport is unregistered, as soon as there are no
active controllers, the lldd's remoteport_delete may be called and the
lldd may continue. Similarly, when a localport is unregistered, as soon
as there are no remoteports with active controllers, the localport_delete
callback may be made. This significantly speeds up unregistration with
the lldd.
The transport objects continue in suspended status with reconnect timers
running, and upon expiration, normal ref-counting will occur and the
objects will be freed. The transport object may still be held hostage
by the application/kernel module, but that is acceptable.
With this change, the lldd may be fully unloaded and reloaded, and
if registrations occur prior to the timeouts, the nvme controller and
namespaces will resume normally as if a link bounce.
Signed-off-by: James Smart <james.smart@broadcom.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
2017-11-03 23:13:17 +08:00
|
|
|
if (atomic_read(&lport->act_rport_cnt) == 0)
|
|
|
|
lport->ops->localport_delete(&lport->localport);
|
|
|
|
|
2016-12-02 16:28:42 +08:00
|
|
|
nvme_fc_lport_put(lport);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL_GPL(nvme_fc_unregister_localport);
|
|
|
|
|
2017-09-15 01:38:42 +08:00
|
|
|
/*
|
|
|
|
* TRADDR strings, per FC-NVME are fixed format:
|
|
|
|
* "nn-0x<16hexdigits>:pn-0x<16hexdigits>" - 43 characters
|
|
|
|
* udev event will only differ by prefix of what field is
|
|
|
|
* being specified:
|
|
|
|
* "NVMEFC_HOST_TRADDR=" or "NVMEFC_TRADDR=" - 19 max characters
|
|
|
|
* 19 + 43 + null_fudge = 64 characters
|
|
|
|
*/
|
|
|
|
#define FCNVME_TRADDR_LENGTH 64
|
|
|
|
|
|
|
|
static void
|
|
|
|
nvme_fc_signal_discovery_scan(struct nvme_fc_lport *lport,
|
|
|
|
struct nvme_fc_rport *rport)
|
|
|
|
{
|
|
|
|
char hostaddr[FCNVME_TRADDR_LENGTH]; /* NVMEFC_HOST_TRADDR=...*/
|
|
|
|
char tgtaddr[FCNVME_TRADDR_LENGTH]; /* NVMEFC_TRADDR=...*/
|
|
|
|
char *envp[4] = { "FC_EVENT=nvmediscovery", hostaddr, tgtaddr, NULL };
|
|
|
|
|
|
|
|
if (!(rport->remoteport.port_role & FC_PORT_ROLE_NVME_DISCOVERY))
|
|
|
|
return;
|
|
|
|
|
|
|
|
snprintf(hostaddr, sizeof(hostaddr),
|
|
|
|
"NVMEFC_HOST_TRADDR=nn-0x%016llx:pn-0x%016llx",
|
|
|
|
lport->localport.node_name, lport->localport.port_name);
|
|
|
|
snprintf(tgtaddr, sizeof(tgtaddr),
|
|
|
|
"NVMEFC_TRADDR=nn-0x%016llx:pn-0x%016llx",
|
|
|
|
rport->remoteport.node_name, rport->remoteport.port_name);
|
|
|
|
kobject_uevent_env(&fc_udev_device->kobj, KOBJ_CHANGE, envp);
|
|
|
|
}
|
|
|
|
|
2017-09-27 12:50:45 +08:00
|
|
|
static void
|
|
|
|
nvme_fc_free_rport(struct kref *ref)
|
|
|
|
{
|
|
|
|
struct nvme_fc_rport *rport =
|
|
|
|
container_of(ref, struct nvme_fc_rport, ref);
|
|
|
|
struct nvme_fc_lport *lport =
|
|
|
|
localport_to_lport(rport->remoteport.localport);
|
|
|
|
unsigned long flags;
|
|
|
|
|
|
|
|
WARN_ON(rport->remoteport.port_state != FC_OBJSTATE_DELETED);
|
|
|
|
WARN_ON(!list_empty(&rport->ctrl_list));
|
|
|
|
|
|
|
|
/* remove from lport list */
|
|
|
|
spin_lock_irqsave(&nvme_fc_lock, flags);
|
|
|
|
list_del(&rport->endp_list);
|
|
|
|
spin_unlock_irqrestore(&nvme_fc_lock, flags);
|
|
|
|
|
2018-09-14 07:17:38 +08:00
|
|
|
WARN_ON(!list_empty(&rport->disc_list));
|
2017-09-27 12:50:45 +08:00
|
|
|
ida_simple_remove(&lport->endp_cnt, rport->remoteport.port_num);
|
|
|
|
|
|
|
|
kfree(rport);
|
|
|
|
|
|
|
|
nvme_fc_lport_put(lport);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
nvme_fc_rport_put(struct nvme_fc_rport *rport)
|
|
|
|
{
|
|
|
|
kref_put(&rport->ref, nvme_fc_free_rport);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
|
|
nvme_fc_rport_get(struct nvme_fc_rport *rport)
|
|
|
|
{
|
|
|
|
return kref_get_unless_zero(&rport->ref);
|
|
|
|
}
|
|
|
|
|
2017-10-26 07:43:17 +08:00
|
|
|
static void
|
|
|
|
nvme_fc_resume_controller(struct nvme_fc_ctrl *ctrl)
|
|
|
|
{
|
|
|
|
switch (ctrl->ctrl.state) {
|
|
|
|
case NVME_CTRL_NEW:
|
2018-02-01 00:31:24 +08:00
|
|
|
case NVME_CTRL_CONNECTING:
|
2017-10-26 07:43:17 +08:00
|
|
|
/*
|
|
|
|
* As all reconnects were suppressed, schedule a
|
|
|
|
* connect.
|
|
|
|
*/
|
|
|
|
dev_info(ctrl->ctrl.device,
|
|
|
|
"NVME-FC{%d}: connectivity re-established. "
|
|
|
|
"Attempting reconnect\n", ctrl->cnum);
|
|
|
|
|
|
|
|
queue_delayed_work(nvme_wq, &ctrl->connect_work, 0);
|
|
|
|
break;
|
|
|
|
|
|
|
|
case NVME_CTRL_RESETTING:
|
|
|
|
/*
|
|
|
|
* Controller is already in the process of terminating the
|
|
|
|
* association. No need to do anything further. The reconnect
|
|
|
|
* step will naturally occur after the reset completes.
|
|
|
|
*/
|
|
|
|
break;
|
|
|
|
|
|
|
|
default:
|
|
|
|
/* no action to take - let it delete */
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct nvme_fc_rport *
|
|
|
|
nvme_fc_attach_to_suspended_rport(struct nvme_fc_lport *lport,
|
|
|
|
struct nvme_fc_port_info *pinfo)
|
|
|
|
{
|
|
|
|
struct nvme_fc_rport *rport;
|
|
|
|
struct nvme_fc_ctrl *ctrl;
|
|
|
|
unsigned long flags;
|
|
|
|
|
|
|
|
spin_lock_irqsave(&nvme_fc_lock, flags);
|
|
|
|
|
|
|
|
list_for_each_entry(rport, &lport->endp_list, endp_list) {
|
|
|
|
if (rport->remoteport.node_name != pinfo->node_name ||
|
|
|
|
rport->remoteport.port_name != pinfo->port_name)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
if (!nvme_fc_rport_get(rport)) {
|
|
|
|
rport = ERR_PTR(-ENOLCK);
|
|
|
|
goto out_done;
|
|
|
|
}
|
|
|
|
|
|
|
|
spin_unlock_irqrestore(&nvme_fc_lock, flags);
|
|
|
|
|
|
|
|
spin_lock_irqsave(&rport->lock, flags);
|
|
|
|
|
|
|
|
/* has it been unregistered */
|
|
|
|
if (rport->remoteport.port_state != FC_OBJSTATE_DELETED) {
|
|
|
|
/* means lldd called us twice */
|
|
|
|
spin_unlock_irqrestore(&rport->lock, flags);
|
|
|
|
nvme_fc_rport_put(rport);
|
|
|
|
return ERR_PTR(-ESTALE);
|
|
|
|
}
|
|
|
|
|
2018-03-06 12:55:49 +08:00
|
|
|
rport->remoteport.port_role = pinfo->port_role;
|
|
|
|
rport->remoteport.port_id = pinfo->port_id;
|
2017-10-26 07:43:17 +08:00
|
|
|
rport->remoteport.port_state = FC_OBJSTATE_ONLINE;
|
|
|
|
rport->dev_loss_end = 0;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* kick off a reconnect attempt on all associations to the
|
|
|
|
* remote port. A successful reconnects will resume i/o.
|
|
|
|
*/
|
|
|
|
list_for_each_entry(ctrl, &rport->ctrl_list, ctrl_list)
|
|
|
|
nvme_fc_resume_controller(ctrl);
|
|
|
|
|
|
|
|
spin_unlock_irqrestore(&rport->lock, flags);
|
|
|
|
|
|
|
|
return rport;
|
|
|
|
}
|
|
|
|
|
|
|
|
rport = NULL;
|
|
|
|
|
|
|
|
out_done:
|
|
|
|
spin_unlock_irqrestore(&nvme_fc_lock, flags);
|
|
|
|
|
|
|
|
return rport;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline void
|
|
|
|
__nvme_fc_set_dev_loss_tmo(struct nvme_fc_rport *rport,
|
|
|
|
struct nvme_fc_port_info *pinfo)
|
|
|
|
{
|
|
|
|
if (pinfo->dev_loss_tmo)
|
|
|
|
rport->remoteport.dev_loss_tmo = pinfo->dev_loss_tmo;
|
|
|
|
else
|
|
|
|
rport->remoteport.dev_loss_tmo = NVME_FC_DEFAULT_DEV_LOSS_TMO;
|
|
|
|
}
|
|
|
|
|
2016-12-02 16:28:42 +08:00
|
|
|
/**
|
|
|
|
* nvme_fc_register_remoteport - transport entry point called by an
|
|
|
|
* LLDD to register the existence of a NVME
|
|
|
|
* subsystem FC port on its fabric.
|
|
|
|
* @localport: pointer to the (registered) local port that the remote
|
|
|
|
* subsystem port is connected to.
|
|
|
|
* @pinfo: pointer to information about the port to be registered
|
2018-10-09 05:28:44 +08:00
|
|
|
* @portptr: pointer to a remote port pointer. Upon success, the routine
|
2016-12-02 16:28:42 +08:00
|
|
|
* will allocate a nvme_fc_remote_port structure and place its
|
|
|
|
* address in the remote port pointer. Upon failure, remote port
|
|
|
|
* pointer will be set to 0.
|
|
|
|
*
|
|
|
|
* Returns:
|
|
|
|
* a completion status. Must be 0 upon success; a negative errno
|
|
|
|
* (ex: -ENXIO) upon failure.
|
|
|
|
*/
|
|
|
|
int
|
|
|
|
nvme_fc_register_remoteport(struct nvme_fc_local_port *localport,
|
|
|
|
struct nvme_fc_port_info *pinfo,
|
|
|
|
struct nvme_fc_remote_port **portptr)
|
|
|
|
{
|
|
|
|
struct nvme_fc_lport *lport = localport_to_lport(localport);
|
|
|
|
struct nvme_fc_rport *newrec;
|
|
|
|
unsigned long flags;
|
|
|
|
int ret, idx;
|
|
|
|
|
2017-10-26 07:43:17 +08:00
|
|
|
if (!nvme_fc_lport_get(lport)) {
|
|
|
|
ret = -ESHUTDOWN;
|
|
|
|
goto out_reghost_failed;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* look to see if there is already a remoteport that is waiting
|
|
|
|
* for a reconnect (within dev_loss_tmo) with the same WWN's.
|
|
|
|
* If so, transition to it and reconnect.
|
|
|
|
*/
|
|
|
|
newrec = nvme_fc_attach_to_suspended_rport(lport, pinfo);
|
|
|
|
|
|
|
|
/* found an rport, but something about its state is bad */
|
|
|
|
if (IS_ERR(newrec)) {
|
|
|
|
ret = PTR_ERR(newrec);
|
|
|
|
goto out_lport_put;
|
|
|
|
|
|
|
|
/* found existing rport, which was resumed */
|
|
|
|
} else if (newrec) {
|
|
|
|
nvme_fc_lport_put(lport);
|
|
|
|
__nvme_fc_set_dev_loss_tmo(newrec, pinfo);
|
|
|
|
nvme_fc_signal_discovery_scan(lport, newrec);
|
|
|
|
*portptr = &newrec->remoteport;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* nothing found - allocate a new remoteport struct */
|
|
|
|
|
2016-12-02 16:28:42 +08:00
|
|
|
newrec = kmalloc((sizeof(*newrec) + lport->ops->remote_priv_sz),
|
|
|
|
GFP_KERNEL);
|
|
|
|
if (!newrec) {
|
|
|
|
ret = -ENOMEM;
|
2017-10-26 07:43:17 +08:00
|
|
|
goto out_lport_put;
|
2016-12-02 16:28:42 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
idx = ida_simple_get(&lport->endp_cnt, 0, 0, GFP_KERNEL);
|
|
|
|
if (idx < 0) {
|
|
|
|
ret = -ENOSPC;
|
2017-10-26 07:43:17 +08:00
|
|
|
goto out_kfree_rport;
|
2016-12-02 16:28:42 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
INIT_LIST_HEAD(&newrec->endp_list);
|
|
|
|
INIT_LIST_HEAD(&newrec->ctrl_list);
|
2017-04-12 02:35:08 +08:00
|
|
|
INIT_LIST_HEAD(&newrec->ls_req_list);
|
2018-09-14 07:17:38 +08:00
|
|
|
INIT_LIST_HEAD(&newrec->disc_list);
|
2016-12-02 16:28:42 +08:00
|
|
|
kref_init(&newrec->ref);
|
nvme-fc: decouple ns references from lldd references
In the lldd api, a lldd may unregister a remoteport (loss of connectivity
or driver unload) or localport (driver unload). The lldd must wait for the
remoteport_delete or localport_delete before completing its actions post
the unregister. The xxx_deletes currently occur only when the xxxport
structure is fully freed after all references are removed. Thus the lldd
may be held hostage until an app or in-kernel entity that has a namespace
open finally closes so the namespace can be removed, the controller
removed, thus the transport objects, thus the lldd.
This patch decouples the transport and os-facing objects from the lldd
and the remoteport and localport. There is a point in all deletions
where the transport will no longer interact with the lldd on behalf of
a controller. That point centers around the association established
with the target/subsystem. It will access the lldd whenever it attempts
to create an association and while the association is active. New
associations may only be created if the remoteport is live (thus the
localport is live). It will not access the lldd after deleting the
association.
Therefore, the patch tracks the count of active controllers - those with
associations being created or that are active - on a remoteport. It also
tracks the number of remoteports that have active controllers, on a
a localport. When a remoteport is unregistered, as soon as there are no
active controllers, the lldd's remoteport_delete may be called and the
lldd may continue. Similarly, when a localport is unregistered, as soon
as there are no remoteports with active controllers, the localport_delete
callback may be made. This significantly speeds up unregistration with
the lldd.
The transport objects continue in suspended status with reconnect timers
running, and upon expiration, normal ref-counting will occur and the
objects will be freed. The transport object may still be held hostage
by the application/kernel module, but that is acceptable.
With this change, the lldd may be fully unloaded and reloaded, and
if registrations occur prior to the timeouts, the nvme controller and
namespaces will resume normally as if a link bounce.
Signed-off-by: James Smart <james.smart@broadcom.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
2017-11-03 23:13:17 +08:00
|
|
|
atomic_set(&newrec->act_ctrl_cnt, 0);
|
2016-12-02 16:28:42 +08:00
|
|
|
spin_lock_init(&newrec->lock);
|
|
|
|
newrec->remoteport.localport = &lport->localport;
|
2017-04-12 02:35:08 +08:00
|
|
|
newrec->dev = lport->dev;
|
|
|
|
newrec->lport = lport;
|
2016-12-02 16:28:42 +08:00
|
|
|
newrec->remoteport.private = &newrec[1];
|
|
|
|
newrec->remoteport.port_role = pinfo->port_role;
|
|
|
|
newrec->remoteport.node_name = pinfo->node_name;
|
|
|
|
newrec->remoteport.port_name = pinfo->port_name;
|
|
|
|
newrec->remoteport.port_id = pinfo->port_id;
|
|
|
|
newrec->remoteport.port_state = FC_OBJSTATE_ONLINE;
|
|
|
|
newrec->remoteport.port_num = idx;
|
2017-10-26 07:43:17 +08:00
|
|
|
__nvme_fc_set_dev_loss_tmo(newrec, pinfo);
|
2016-12-02 16:28:42 +08:00
|
|
|
|
|
|
|
spin_lock_irqsave(&nvme_fc_lock, flags);
|
|
|
|
list_add_tail(&newrec->endp_list, &lport->endp_list);
|
|
|
|
spin_unlock_irqrestore(&nvme_fc_lock, flags);
|
|
|
|
|
2017-09-15 01:38:42 +08:00
|
|
|
nvme_fc_signal_discovery_scan(lport, newrec);
|
|
|
|
|
2016-12-02 16:28:42 +08:00
|
|
|
*portptr = &newrec->remoteport;
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
out_kfree_rport:
|
|
|
|
kfree(newrec);
|
2017-10-26 07:43:17 +08:00
|
|
|
out_lport_put:
|
|
|
|
nvme_fc_lport_put(lport);
|
2016-12-02 16:28:42 +08:00
|
|
|
out_reghost_failed:
|
|
|
|
*portptr = NULL;
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL_GPL(nvme_fc_register_remoteport);
|
|
|
|
|
2017-04-12 02:35:09 +08:00
|
|
|
static int
|
|
|
|
nvme_fc_abort_lsops(struct nvme_fc_rport *rport)
|
|
|
|
{
|
|
|
|
struct nvmefc_ls_req_op *lsop;
|
|
|
|
unsigned long flags;
|
|
|
|
|
|
|
|
restart:
|
|
|
|
spin_lock_irqsave(&rport->lock, flags);
|
|
|
|
|
|
|
|
list_for_each_entry(lsop, &rport->ls_req_list, lsreq_list) {
|
|
|
|
if (!(lsop->flags & FCOP_FLAGS_TERMIO)) {
|
|
|
|
lsop->flags |= FCOP_FLAGS_TERMIO;
|
|
|
|
spin_unlock_irqrestore(&rport->lock, flags);
|
|
|
|
rport->lport->ops->ls_abort(&rport->lport->localport,
|
|
|
|
&rport->remoteport,
|
|
|
|
&lsop->ls_req);
|
|
|
|
goto restart;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
spin_unlock_irqrestore(&rport->lock, flags);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2017-10-26 07:43:17 +08:00
|
|
|
static void
|
|
|
|
nvme_fc_ctrl_connectivity_loss(struct nvme_fc_ctrl *ctrl)
|
|
|
|
{
|
|
|
|
dev_info(ctrl->ctrl.device,
|
|
|
|
"NVME-FC{%d}: controller connectivity lost. Awaiting "
|
|
|
|
"Reconnect", ctrl->cnum);
|
|
|
|
|
|
|
|
switch (ctrl->ctrl.state) {
|
|
|
|
case NVME_CTRL_NEW:
|
|
|
|
case NVME_CTRL_LIVE:
|
|
|
|
/*
|
|
|
|
* Schedule a controller reset. The reset will terminate the
|
|
|
|
* association and schedule the reconnect timer. Reconnects
|
|
|
|
* will be attempted until either the ctlr_loss_tmo
|
|
|
|
* (max_retries * connect_delay) expires or the remoteport's
|
|
|
|
* dev_loss_tmo expires.
|
|
|
|
*/
|
|
|
|
if (nvme_reset_ctrl(&ctrl->ctrl)) {
|
|
|
|
dev_warn(ctrl->ctrl.device,
|
2018-03-11 23:46:06 +08:00
|
|
|
"NVME-FC{%d}: Couldn't schedule reset.\n",
|
2017-10-26 07:43:17 +08:00
|
|
|
ctrl->cnum);
|
|
|
|
nvme_delete_ctrl(&ctrl->ctrl);
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
2018-02-01 00:31:24 +08:00
|
|
|
case NVME_CTRL_CONNECTING:
|
2017-10-26 07:43:17 +08:00
|
|
|
/*
|
|
|
|
* The association has already been terminated and the
|
|
|
|
* controller is attempting reconnects. No need to do anything
|
|
|
|
* futher. Reconnects will be attempted until either the
|
|
|
|
* ctlr_loss_tmo (max_retries * connect_delay) expires or the
|
|
|
|
* remoteport's dev_loss_tmo expires.
|
|
|
|
*/
|
|
|
|
break;
|
|
|
|
|
|
|
|
case NVME_CTRL_RESETTING:
|
|
|
|
/*
|
|
|
|
* Controller is already in the process of terminating the
|
|
|
|
* association. No need to do anything further. The reconnect
|
|
|
|
* step will kick in naturally after the association is
|
|
|
|
* terminated.
|
|
|
|
*/
|
|
|
|
break;
|
|
|
|
|
|
|
|
case NVME_CTRL_DELETING:
|
|
|
|
default:
|
|
|
|
/* no action to take - let it delete */
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-12-02 16:28:42 +08:00
|
|
|
/**
|
|
|
|
* nvme_fc_unregister_remoteport - transport entry point called by an
|
|
|
|
* LLDD to deregister/remove a previously
|
|
|
|
* registered a NVME subsystem FC port.
|
2018-10-09 05:28:44 +08:00
|
|
|
* @portptr: pointer to the (registered) remote port that is to be
|
|
|
|
* deregistered.
|
2016-12-02 16:28:42 +08:00
|
|
|
*
|
|
|
|
* Returns:
|
|
|
|
* a completion status. Must be 0 upon success; a negative errno
|
|
|
|
* (ex: -ENXIO) upon failure.
|
|
|
|
*/
|
|
|
|
int
|
|
|
|
nvme_fc_unregister_remoteport(struct nvme_fc_remote_port *portptr)
|
|
|
|
{
|
|
|
|
struct nvme_fc_rport *rport = remoteport_to_rport(portptr);
|
|
|
|
struct nvme_fc_ctrl *ctrl;
|
|
|
|
unsigned long flags;
|
|
|
|
|
|
|
|
if (!portptr)
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
spin_lock_irqsave(&rport->lock, flags);
|
|
|
|
|
|
|
|
if (portptr->port_state != FC_OBJSTATE_ONLINE) {
|
|
|
|
spin_unlock_irqrestore(&rport->lock, flags);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
portptr->port_state = FC_OBJSTATE_DELETED;
|
|
|
|
|
2017-10-26 07:43:17 +08:00
|
|
|
rport->dev_loss_end = jiffies + (portptr->dev_loss_tmo * HZ);
|
|
|
|
|
|
|
|
list_for_each_entry(ctrl, &rport->ctrl_list, ctrl_list) {
|
|
|
|
/* if dev_loss_tmo==0, dev loss is immediate */
|
|
|
|
if (!portptr->dev_loss_tmo) {
|
|
|
|
dev_warn(ctrl->ctrl.device,
|
2018-03-11 23:46:06 +08:00
|
|
|
"NVME-FC{%d}: controller connectivity lost.\n",
|
2017-10-26 07:43:17 +08:00
|
|
|
ctrl->cnum);
|
|
|
|
nvme_delete_ctrl(&ctrl->ctrl);
|
|
|
|
} else
|
|
|
|
nvme_fc_ctrl_connectivity_loss(ctrl);
|
|
|
|
}
|
2016-12-02 16:28:42 +08:00
|
|
|
|
|
|
|
spin_unlock_irqrestore(&rport->lock, flags);
|
|
|
|
|
2017-04-12 02:35:09 +08:00
|
|
|
nvme_fc_abort_lsops(rport);
|
|
|
|
|
nvme-fc: decouple ns references from lldd references
In the lldd api, a lldd may unregister a remoteport (loss of connectivity
or driver unload) or localport (driver unload). The lldd must wait for the
remoteport_delete or localport_delete before completing its actions post
the unregister. The xxx_deletes currently occur only when the xxxport
structure is fully freed after all references are removed. Thus the lldd
may be held hostage until an app or in-kernel entity that has a namespace
open finally closes so the namespace can be removed, the controller
removed, thus the transport objects, thus the lldd.
This patch decouples the transport and os-facing objects from the lldd
and the remoteport and localport. There is a point in all deletions
where the transport will no longer interact with the lldd on behalf of
a controller. That point centers around the association established
with the target/subsystem. It will access the lldd whenever it attempts
to create an association and while the association is active. New
associations may only be created if the remoteport is live (thus the
localport is live). It will not access the lldd after deleting the
association.
Therefore, the patch tracks the count of active controllers - those with
associations being created or that are active - on a remoteport. It also
tracks the number of remoteports that have active controllers, on a
a localport. When a remoteport is unregistered, as soon as there are no
active controllers, the lldd's remoteport_delete may be called and the
lldd may continue. Similarly, when a localport is unregistered, as soon
as there are no remoteports with active controllers, the localport_delete
callback may be made. This significantly speeds up unregistration with
the lldd.
The transport objects continue in suspended status with reconnect timers
running, and upon expiration, normal ref-counting will occur and the
objects will be freed. The transport object may still be held hostage
by the application/kernel module, but that is acceptable.
With this change, the lldd may be fully unloaded and reloaded, and
if registrations occur prior to the timeouts, the nvme controller and
namespaces will resume normally as if a link bounce.
Signed-off-by: James Smart <james.smart@broadcom.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
2017-11-03 23:13:17 +08:00
|
|
|
if (atomic_read(&rport->act_ctrl_cnt) == 0)
|
|
|
|
rport->lport->ops->remoteport_delete(portptr);
|
|
|
|
|
2017-10-26 07:43:17 +08:00
|
|
|
/*
|
|
|
|
* release the reference, which will allow, if all controllers
|
|
|
|
* go away, which should only occur after dev_loss_tmo occurs,
|
|
|
|
* for the rport to be torn down.
|
|
|
|
*/
|
2016-12-02 16:28:42 +08:00
|
|
|
nvme_fc_rport_put(rport);
|
2017-10-26 07:43:17 +08:00
|
|
|
|
2016-12-02 16:28:42 +08:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL_GPL(nvme_fc_unregister_remoteport);
|
|
|
|
|
2017-09-15 01:38:42 +08:00
|
|
|
/**
|
|
|
|
* nvme_fc_rescan_remoteport - transport entry point called by an
|
|
|
|
* LLDD to request a nvme device rescan.
|
|
|
|
* @remoteport: pointer to the (registered) remote port that is to be
|
|
|
|
* rescanned.
|
|
|
|
*
|
|
|
|
* Returns: N/A
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
nvme_fc_rescan_remoteport(struct nvme_fc_remote_port *remoteport)
|
|
|
|
{
|
|
|
|
struct nvme_fc_rport *rport = remoteport_to_rport(remoteport);
|
|
|
|
|
|
|
|
nvme_fc_signal_discovery_scan(rport->lport, rport);
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL_GPL(nvme_fc_rescan_remoteport);
|
|
|
|
|
2017-10-26 07:43:15 +08:00
|
|
|
int
|
|
|
|
nvme_fc_set_remoteport_devloss(struct nvme_fc_remote_port *portptr,
|
|
|
|
u32 dev_loss_tmo)
|
|
|
|
{
|
|
|
|
struct nvme_fc_rport *rport = remoteport_to_rport(portptr);
|
|
|
|
unsigned long flags;
|
|
|
|
|
|
|
|
spin_lock_irqsave(&rport->lock, flags);
|
|
|
|
|
|
|
|
if (portptr->port_state != FC_OBJSTATE_ONLINE) {
|
|
|
|
spin_unlock_irqrestore(&rport->lock, flags);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* a dev_loss_tmo of 0 (immediate) is allowed to be set */
|
|
|
|
rport->remoteport.dev_loss_tmo = dev_loss_tmo;
|
|
|
|
|
|
|
|
spin_unlock_irqrestore(&rport->lock, flags);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL_GPL(nvme_fc_set_remoteport_devloss);
|
|
|
|
|
2016-12-02 16:28:42 +08:00
|
|
|
|
|
|
|
/* *********************** FC-NVME DMA Handling **************************** */
|
|
|
|
|
|
|
|
/*
|
|
|
|
* The fcloop device passes in a NULL device pointer. Real LLD's will
|
|
|
|
* pass in a valid device pointer. If NULL is passed to the dma mapping
|
|
|
|
* routines, depending on the platform, it may or may not succeed, and
|
|
|
|
* may crash.
|
|
|
|
*
|
|
|
|
* As such:
|
|
|
|
* Wrapper all the dma routines and check the dev pointer.
|
|
|
|
*
|
|
|
|
* If simple mappings (return just a dma address, we'll noop them,
|
|
|
|
* returning a dma address of 0.
|
|
|
|
*
|
|
|
|
* On more complex mappings (dma_map_sg), a pseudo routine fills
|
|
|
|
* in the scatter list, setting all dma addresses to 0.
|
|
|
|
*/
|
|
|
|
|
|
|
|
static inline dma_addr_t
|
|
|
|
fc_dma_map_single(struct device *dev, void *ptr, size_t size,
|
|
|
|
enum dma_data_direction dir)
|
|
|
|
{
|
|
|
|
return dev ? dma_map_single(dev, ptr, size, dir) : (dma_addr_t)0L;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline int
|
|
|
|
fc_dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
|
|
|
|
{
|
|
|
|
return dev ? dma_mapping_error(dev, dma_addr) : 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline void
|
|
|
|
fc_dma_unmap_single(struct device *dev, dma_addr_t addr, size_t size,
|
|
|
|
enum dma_data_direction dir)
|
|
|
|
{
|
|
|
|
if (dev)
|
|
|
|
dma_unmap_single(dev, addr, size, dir);
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline void
|
|
|
|
fc_dma_sync_single_for_cpu(struct device *dev, dma_addr_t addr, size_t size,
|
|
|
|
enum dma_data_direction dir)
|
|
|
|
{
|
|
|
|
if (dev)
|
|
|
|
dma_sync_single_for_cpu(dev, addr, size, dir);
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline void
|
|
|
|
fc_dma_sync_single_for_device(struct device *dev, dma_addr_t addr, size_t size,
|
|
|
|
enum dma_data_direction dir)
|
|
|
|
{
|
|
|
|
if (dev)
|
|
|
|
dma_sync_single_for_device(dev, addr, size, dir);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* pseudo dma_map_sg call */
|
|
|
|
static int
|
|
|
|
fc_map_sg(struct scatterlist *sg, int nents)
|
|
|
|
{
|
|
|
|
struct scatterlist *s;
|
|
|
|
int i;
|
|
|
|
|
|
|
|
WARN_ON(nents == 0 || sg[0].length == 0);
|
|
|
|
|
|
|
|
for_each_sg(sg, s, nents, i) {
|
|
|
|
s->dma_address = 0L;
|
|
|
|
#ifdef CONFIG_NEED_SG_DMA_LENGTH
|
|
|
|
s->dma_length = s->length;
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
return nents;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline int
|
|
|
|
fc_dma_map_sg(struct device *dev, struct scatterlist *sg, int nents,
|
|
|
|
enum dma_data_direction dir)
|
|
|
|
{
|
|
|
|
return dev ? dma_map_sg(dev, sg, nents, dir) : fc_map_sg(sg, nents);
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline void
|
|
|
|
fc_dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nents,
|
|
|
|
enum dma_data_direction dir)
|
|
|
|
{
|
|
|
|
if (dev)
|
|
|
|
dma_unmap_sg(dev, sg, nents, dir);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* *********************** FC-NVME LS Handling **************************** */
|
|
|
|
|
|
|
|
static void nvme_fc_ctrl_put(struct nvme_fc_ctrl *);
|
|
|
|
static int nvme_fc_ctrl_get(struct nvme_fc_ctrl *);
|
|
|
|
|
|
|
|
|
|
|
|
static void
|
2017-04-12 02:35:08 +08:00
|
|
|
__nvme_fc_finish_ls_req(struct nvmefc_ls_req_op *lsop)
|
2016-12-02 16:28:42 +08:00
|
|
|
{
|
2017-04-12 02:35:08 +08:00
|
|
|
struct nvme_fc_rport *rport = lsop->rport;
|
2016-12-02 16:28:42 +08:00
|
|
|
struct nvmefc_ls_req *lsreq = &lsop->ls_req;
|
|
|
|
unsigned long flags;
|
|
|
|
|
2017-04-12 02:35:08 +08:00
|
|
|
spin_lock_irqsave(&rport->lock, flags);
|
2016-12-02 16:28:42 +08:00
|
|
|
|
|
|
|
if (!lsop->req_queued) {
|
2017-04-12 02:35:08 +08:00
|
|
|
spin_unlock_irqrestore(&rport->lock, flags);
|
2016-12-02 16:28:42 +08:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
list_del(&lsop->lsreq_list);
|
|
|
|
|
|
|
|
lsop->req_queued = false;
|
|
|
|
|
2017-04-12 02:35:08 +08:00
|
|
|
spin_unlock_irqrestore(&rport->lock, flags);
|
2016-12-02 16:28:42 +08:00
|
|
|
|
2017-04-12 02:35:08 +08:00
|
|
|
fc_dma_unmap_single(rport->dev, lsreq->rqstdma,
|
2016-12-02 16:28:42 +08:00
|
|
|
(lsreq->rqstlen + lsreq->rsplen),
|
|
|
|
DMA_BIDIRECTIONAL);
|
|
|
|
|
2017-04-12 02:35:08 +08:00
|
|
|
nvme_fc_rport_put(rport);
|
2016-12-02 16:28:42 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
2017-04-12 02:35:08 +08:00
|
|
|
__nvme_fc_send_ls_req(struct nvme_fc_rport *rport,
|
2016-12-02 16:28:42 +08:00
|
|
|
struct nvmefc_ls_req_op *lsop,
|
|
|
|
void (*done)(struct nvmefc_ls_req *req, int status))
|
|
|
|
{
|
|
|
|
struct nvmefc_ls_req *lsreq = &lsop->ls_req;
|
|
|
|
unsigned long flags;
|
2017-04-12 02:35:08 +08:00
|
|
|
int ret = 0;
|
2016-12-02 16:28:42 +08:00
|
|
|
|
2017-04-12 02:35:08 +08:00
|
|
|
if (rport->remoteport.port_state != FC_OBJSTATE_ONLINE)
|
|
|
|
return -ECONNREFUSED;
|
|
|
|
|
|
|
|
if (!nvme_fc_rport_get(rport))
|
2016-12-02 16:28:42 +08:00
|
|
|
return -ESHUTDOWN;
|
|
|
|
|
|
|
|
lsreq->done = done;
|
2017-04-12 02:35:08 +08:00
|
|
|
lsop->rport = rport;
|
2016-12-02 16:28:42 +08:00
|
|
|
lsop->req_queued = false;
|
|
|
|
INIT_LIST_HEAD(&lsop->lsreq_list);
|
|
|
|
init_completion(&lsop->ls_done);
|
|
|
|
|
2017-04-12 02:35:08 +08:00
|
|
|
lsreq->rqstdma = fc_dma_map_single(rport->dev, lsreq->rqstaddr,
|
2016-12-02 16:28:42 +08:00
|
|
|
lsreq->rqstlen + lsreq->rsplen,
|
|
|
|
DMA_BIDIRECTIONAL);
|
2017-04-12 02:35:08 +08:00
|
|
|
if (fc_dma_mapping_error(rport->dev, lsreq->rqstdma)) {
|
|
|
|
ret = -EFAULT;
|
|
|
|
goto out_putrport;
|
2016-12-02 16:28:42 +08:00
|
|
|
}
|
|
|
|
lsreq->rspdma = lsreq->rqstdma + lsreq->rqstlen;
|
|
|
|
|
2017-04-12 02:35:08 +08:00
|
|
|
spin_lock_irqsave(&rport->lock, flags);
|
2016-12-02 16:28:42 +08:00
|
|
|
|
2017-04-12 02:35:08 +08:00
|
|
|
list_add_tail(&lsop->lsreq_list, &rport->ls_req_list);
|
2016-12-02 16:28:42 +08:00
|
|
|
|
|
|
|
lsop->req_queued = true;
|
|
|
|
|
2017-04-12 02:35:08 +08:00
|
|
|
spin_unlock_irqrestore(&rport->lock, flags);
|
2016-12-02 16:28:42 +08:00
|
|
|
|
2017-04-12 02:35:08 +08:00
|
|
|
ret = rport->lport->ops->ls_req(&rport->lport->localport,
|
|
|
|
&rport->remoteport, lsreq);
|
2016-12-02 16:28:42 +08:00
|
|
|
if (ret)
|
2017-04-12 02:35:08 +08:00
|
|
|
goto out_unlink;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
out_unlink:
|
|
|
|
lsop->ls_error = ret;
|
|
|
|
spin_lock_irqsave(&rport->lock, flags);
|
|
|
|
lsop->req_queued = false;
|
|
|
|
list_del(&lsop->lsreq_list);
|
|
|
|
spin_unlock_irqrestore(&rport->lock, flags);
|
|
|
|
fc_dma_unmap_single(rport->dev, lsreq->rqstdma,
|
|
|
|
(lsreq->rqstlen + lsreq->rsplen),
|
|
|
|
DMA_BIDIRECTIONAL);
|
|
|
|
out_putrport:
|
|
|
|
nvme_fc_rport_put(rport);
|
2016-12-02 16:28:42 +08:00
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
nvme_fc_send_ls_req_done(struct nvmefc_ls_req *lsreq, int status)
|
|
|
|
{
|
|
|
|
struct nvmefc_ls_req_op *lsop = ls_req_to_lsop(lsreq);
|
|
|
|
|
|
|
|
lsop->ls_error = status;
|
|
|
|
complete(&lsop->ls_done);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
2017-04-12 02:35:08 +08:00
|
|
|
nvme_fc_send_ls_req(struct nvme_fc_rport *rport, struct nvmefc_ls_req_op *lsop)
|
2016-12-02 16:28:42 +08:00
|
|
|
{
|
|
|
|
struct nvmefc_ls_req *lsreq = &lsop->ls_req;
|
|
|
|
struct fcnvme_ls_rjt *rjt = lsreq->rspaddr;
|
|
|
|
int ret;
|
|
|
|
|
2017-04-12 02:35:08 +08:00
|
|
|
ret = __nvme_fc_send_ls_req(rport, lsop, nvme_fc_send_ls_req_done);
|
2016-12-02 16:28:42 +08:00
|
|
|
|
2017-04-12 02:35:08 +08:00
|
|
|
if (!ret) {
|
2016-12-02 16:28:42 +08:00
|
|
|
/*
|
|
|
|
* No timeout/not interruptible as we need the struct
|
|
|
|
* to exist until the lldd calls us back. Thus mandate
|
|
|
|
* wait until driver calls back. lldd responsible for
|
|
|
|
* the timeout action
|
|
|
|
*/
|
|
|
|
wait_for_completion(&lsop->ls_done);
|
|
|
|
|
2017-04-12 02:35:08 +08:00
|
|
|
__nvme_fc_finish_ls_req(lsop);
|
2016-12-02 16:28:42 +08:00
|
|
|
|
2017-04-12 02:35:08 +08:00
|
|
|
ret = lsop->ls_error;
|
2016-12-02 16:28:42 +08:00
|
|
|
}
|
|
|
|
|
2017-04-12 02:35:08 +08:00
|
|
|
if (ret)
|
|
|
|
return ret;
|
|
|
|
|
2016-12-02 16:28:42 +08:00
|
|
|
/* ACC or RJT payload ? */
|
|
|
|
if (rjt->w0.ls_cmd == FCNVME_LS_RJT)
|
|
|
|
return -ENXIO;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2017-04-12 02:35:08 +08:00
|
|
|
static int
|
|
|
|
nvme_fc_send_ls_req_async(struct nvme_fc_rport *rport,
|
2016-12-02 16:28:42 +08:00
|
|
|
struct nvmefc_ls_req_op *lsop,
|
|
|
|
void (*done)(struct nvmefc_ls_req *req, int status))
|
|
|
|
{
|
|
|
|
/* don't wait for completion */
|
|
|
|
|
2017-04-12 02:35:08 +08:00
|
|
|
return __nvme_fc_send_ls_req(rport, lsop, done);
|
2016-12-02 16:28:42 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/* Validation Error indexes into the string table below */
|
|
|
|
enum {
|
|
|
|
VERR_NO_ERROR = 0,
|
|
|
|
VERR_LSACC = 1,
|
|
|
|
VERR_LSDESC_RQST = 2,
|
|
|
|
VERR_LSDESC_RQST_LEN = 3,
|
|
|
|
VERR_ASSOC_ID = 4,
|
|
|
|
VERR_ASSOC_ID_LEN = 5,
|
|
|
|
VERR_CONN_ID = 6,
|
|
|
|
VERR_CONN_ID_LEN = 7,
|
|
|
|
VERR_CR_ASSOC = 8,
|
|
|
|
VERR_CR_ASSOC_ACC_LEN = 9,
|
|
|
|
VERR_CR_CONN = 10,
|
|
|
|
VERR_CR_CONN_ACC_LEN = 11,
|
|
|
|
VERR_DISCONN = 12,
|
|
|
|
VERR_DISCONN_ACC_LEN = 13,
|
|
|
|
};
|
|
|
|
|
|
|
|
static char *validation_errors[] = {
|
|
|
|
"OK",
|
|
|
|
"Not LS_ACC",
|
|
|
|
"Not LSDESC_RQST",
|
|
|
|
"Bad LSDESC_RQST Length",
|
|
|
|
"Not Association ID",
|
|
|
|
"Bad Association ID Length",
|
|
|
|
"Not Connection ID",
|
|
|
|
"Bad Connection ID Length",
|
|
|
|
"Not CR_ASSOC Rqst",
|
|
|
|
"Bad CR_ASSOC ACC Length",
|
|
|
|
"Not CR_CONN Rqst",
|
|
|
|
"Bad CR_CONN ACC Length",
|
|
|
|
"Not Disconnect Rqst",
|
|
|
|
"Bad Disconnect ACC Length",
|
|
|
|
};
|
|
|
|
|
|
|
|
static int
|
|
|
|
nvme_fc_connect_admin_queue(struct nvme_fc_ctrl *ctrl,
|
|
|
|
struct nvme_fc_queue *queue, u16 qsize, u16 ersp_ratio)
|
|
|
|
{
|
|
|
|
struct nvmefc_ls_req_op *lsop;
|
|
|
|
struct nvmefc_ls_req *lsreq;
|
|
|
|
struct fcnvme_ls_cr_assoc_rqst *assoc_rqst;
|
|
|
|
struct fcnvme_ls_cr_assoc_acc *assoc_acc;
|
|
|
|
int ret, fcret = 0;
|
|
|
|
|
|
|
|
lsop = kzalloc((sizeof(*lsop) +
|
|
|
|
ctrl->lport->ops->lsrqst_priv_sz +
|
|
|
|
sizeof(*assoc_rqst) + sizeof(*assoc_acc)), GFP_KERNEL);
|
|
|
|
if (!lsop) {
|
|
|
|
ret = -ENOMEM;
|
|
|
|
goto out_no_memory;
|
|
|
|
}
|
|
|
|
lsreq = &lsop->ls_req;
|
|
|
|
|
|
|
|
lsreq->private = (void *)&lsop[1];
|
|
|
|
assoc_rqst = (struct fcnvme_ls_cr_assoc_rqst *)
|
|
|
|
(lsreq->private + ctrl->lport->ops->lsrqst_priv_sz);
|
|
|
|
assoc_acc = (struct fcnvme_ls_cr_assoc_acc *)&assoc_rqst[1];
|
|
|
|
|
|
|
|
assoc_rqst->w0.ls_cmd = FCNVME_LS_CREATE_ASSOCIATION;
|
|
|
|
assoc_rqst->desc_list_len =
|
|
|
|
cpu_to_be32(sizeof(struct fcnvme_lsdesc_cr_assoc_cmd));
|
|
|
|
|
|
|
|
assoc_rqst->assoc_cmd.desc_tag =
|
|
|
|
cpu_to_be32(FCNVME_LSDESC_CREATE_ASSOC_CMD);
|
|
|
|
assoc_rqst->assoc_cmd.desc_len =
|
|
|
|
fcnvme_lsdesc_len(
|
|
|
|
sizeof(struct fcnvme_lsdesc_cr_assoc_cmd));
|
|
|
|
|
|
|
|
assoc_rqst->assoc_cmd.ersp_ratio = cpu_to_be16(ersp_ratio);
|
2018-03-08 07:59:36 +08:00
|
|
|
assoc_rqst->assoc_cmd.sqsize = cpu_to_be16(qsize - 1);
|
2016-12-02 16:28:42 +08:00
|
|
|
/* Linux supports only Dynamic controllers */
|
|
|
|
assoc_rqst->assoc_cmd.cntlid = cpu_to_be16(0xffff);
|
2017-05-17 15:54:27 +08:00
|
|
|
uuid_copy(&assoc_rqst->assoc_cmd.hostid, &ctrl->ctrl.opts->host->id);
|
2016-12-02 16:28:42 +08:00
|
|
|
strncpy(assoc_rqst->assoc_cmd.hostnqn, ctrl->ctrl.opts->host->nqn,
|
|
|
|
min(FCNVME_ASSOC_HOSTNQN_LEN, NVMF_NQN_SIZE));
|
|
|
|
strncpy(assoc_rqst->assoc_cmd.subnqn, ctrl->ctrl.opts->subsysnqn,
|
|
|
|
min(FCNVME_ASSOC_SUBNQN_LEN, NVMF_NQN_SIZE));
|
|
|
|
|
|
|
|
lsop->queue = queue;
|
|
|
|
lsreq->rqstaddr = assoc_rqst;
|
|
|
|
lsreq->rqstlen = sizeof(*assoc_rqst);
|
|
|
|
lsreq->rspaddr = assoc_acc;
|
|
|
|
lsreq->rsplen = sizeof(*assoc_acc);
|
|
|
|
lsreq->timeout = NVME_FC_CONNECT_TIMEOUT_SEC;
|
|
|
|
|
2017-04-12 02:35:08 +08:00
|
|
|
ret = nvme_fc_send_ls_req(ctrl->rport, lsop);
|
2016-12-02 16:28:42 +08:00
|
|
|
if (ret)
|
|
|
|
goto out_free_buffer;
|
|
|
|
|
|
|
|
/* process connect LS completion */
|
|
|
|
|
|
|
|
/* validate the ACC response */
|
|
|
|
if (assoc_acc->hdr.w0.ls_cmd != FCNVME_LS_ACC)
|
|
|
|
fcret = VERR_LSACC;
|
2017-03-24 11:41:25 +08:00
|
|
|
else if (assoc_acc->hdr.desc_list_len !=
|
2016-12-02 16:28:42 +08:00
|
|
|
fcnvme_lsdesc_len(
|
|
|
|
sizeof(struct fcnvme_ls_cr_assoc_acc)))
|
|
|
|
fcret = VERR_CR_ASSOC_ACC_LEN;
|
2017-03-24 11:41:25 +08:00
|
|
|
else if (assoc_acc->hdr.rqst.desc_tag !=
|
|
|
|
cpu_to_be32(FCNVME_LSDESC_RQST))
|
2016-12-02 16:28:42 +08:00
|
|
|
fcret = VERR_LSDESC_RQST;
|
|
|
|
else if (assoc_acc->hdr.rqst.desc_len !=
|
|
|
|
fcnvme_lsdesc_len(sizeof(struct fcnvme_lsdesc_rqst)))
|
|
|
|
fcret = VERR_LSDESC_RQST_LEN;
|
|
|
|
else if (assoc_acc->hdr.rqst.w0.ls_cmd != FCNVME_LS_CREATE_ASSOCIATION)
|
|
|
|
fcret = VERR_CR_ASSOC;
|
|
|
|
else if (assoc_acc->associd.desc_tag !=
|
|
|
|
cpu_to_be32(FCNVME_LSDESC_ASSOC_ID))
|
|
|
|
fcret = VERR_ASSOC_ID;
|
|
|
|
else if (assoc_acc->associd.desc_len !=
|
|
|
|
fcnvme_lsdesc_len(
|
|
|
|
sizeof(struct fcnvme_lsdesc_assoc_id)))
|
|
|
|
fcret = VERR_ASSOC_ID_LEN;
|
|
|
|
else if (assoc_acc->connectid.desc_tag !=
|
|
|
|
cpu_to_be32(FCNVME_LSDESC_CONN_ID))
|
|
|
|
fcret = VERR_CONN_ID;
|
|
|
|
else if (assoc_acc->connectid.desc_len !=
|
|
|
|
fcnvme_lsdesc_len(sizeof(struct fcnvme_lsdesc_conn_id)))
|
|
|
|
fcret = VERR_CONN_ID_LEN;
|
|
|
|
|
|
|
|
if (fcret) {
|
|
|
|
ret = -EBADF;
|
|
|
|
dev_err(ctrl->dev,
|
|
|
|
"q %d connect failed: %s\n",
|
|
|
|
queue->qnum, validation_errors[fcret]);
|
|
|
|
} else {
|
|
|
|
ctrl->association_id =
|
|
|
|
be64_to_cpu(assoc_acc->associd.association_id);
|
|
|
|
queue->connection_id =
|
|
|
|
be64_to_cpu(assoc_acc->connectid.connection_id);
|
|
|
|
set_bit(NVME_FC_Q_CONNECTED, &queue->flags);
|
|
|
|
}
|
|
|
|
|
|
|
|
out_free_buffer:
|
|
|
|
kfree(lsop);
|
|
|
|
out_no_memory:
|
|
|
|
if (ret)
|
|
|
|
dev_err(ctrl->dev,
|
|
|
|
"queue %d connect admin queue failed (%d).\n",
|
|
|
|
queue->qnum, ret);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
|
|
nvme_fc_connect_queue(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue,
|
|
|
|
u16 qsize, u16 ersp_ratio)
|
|
|
|
{
|
|
|
|
struct nvmefc_ls_req_op *lsop;
|
|
|
|
struct nvmefc_ls_req *lsreq;
|
|
|
|
struct fcnvme_ls_cr_conn_rqst *conn_rqst;
|
|
|
|
struct fcnvme_ls_cr_conn_acc *conn_acc;
|
|
|
|
int ret, fcret = 0;
|
|
|
|
|
|
|
|
lsop = kzalloc((sizeof(*lsop) +
|
|
|
|
ctrl->lport->ops->lsrqst_priv_sz +
|
|
|
|
sizeof(*conn_rqst) + sizeof(*conn_acc)), GFP_KERNEL);
|
|
|
|
if (!lsop) {
|
|
|
|
ret = -ENOMEM;
|
|
|
|
goto out_no_memory;
|
|
|
|
}
|
|
|
|
lsreq = &lsop->ls_req;
|
|
|
|
|
|
|
|
lsreq->private = (void *)&lsop[1];
|
|
|
|
conn_rqst = (struct fcnvme_ls_cr_conn_rqst *)
|
|
|
|
(lsreq->private + ctrl->lport->ops->lsrqst_priv_sz);
|
|
|
|
conn_acc = (struct fcnvme_ls_cr_conn_acc *)&conn_rqst[1];
|
|
|
|
|
|
|
|
conn_rqst->w0.ls_cmd = FCNVME_LS_CREATE_CONNECTION;
|
|
|
|
conn_rqst->desc_list_len = cpu_to_be32(
|
|
|
|
sizeof(struct fcnvme_lsdesc_assoc_id) +
|
|
|
|
sizeof(struct fcnvme_lsdesc_cr_conn_cmd));
|
|
|
|
|
|
|
|
conn_rqst->associd.desc_tag = cpu_to_be32(FCNVME_LSDESC_ASSOC_ID);
|
|
|
|
conn_rqst->associd.desc_len =
|
|
|
|
fcnvme_lsdesc_len(
|
|
|
|
sizeof(struct fcnvme_lsdesc_assoc_id));
|
|
|
|
conn_rqst->associd.association_id = cpu_to_be64(ctrl->association_id);
|
|
|
|
conn_rqst->connect_cmd.desc_tag =
|
|
|
|
cpu_to_be32(FCNVME_LSDESC_CREATE_CONN_CMD);
|
|
|
|
conn_rqst->connect_cmd.desc_len =
|
|
|
|
fcnvme_lsdesc_len(
|
|
|
|
sizeof(struct fcnvme_lsdesc_cr_conn_cmd));
|
|
|
|
conn_rqst->connect_cmd.ersp_ratio = cpu_to_be16(ersp_ratio);
|
|
|
|
conn_rqst->connect_cmd.qid = cpu_to_be16(queue->qnum);
|
2018-03-08 07:59:36 +08:00
|
|
|
conn_rqst->connect_cmd.sqsize = cpu_to_be16(qsize - 1);
|
2016-12-02 16:28:42 +08:00
|
|
|
|
|
|
|
lsop->queue = queue;
|
|
|
|
lsreq->rqstaddr = conn_rqst;
|
|
|
|
lsreq->rqstlen = sizeof(*conn_rqst);
|
|
|
|
lsreq->rspaddr = conn_acc;
|
|
|
|
lsreq->rsplen = sizeof(*conn_acc);
|
|
|
|
lsreq->timeout = NVME_FC_CONNECT_TIMEOUT_SEC;
|
|
|
|
|
2017-04-12 02:35:08 +08:00
|
|
|
ret = nvme_fc_send_ls_req(ctrl->rport, lsop);
|
2016-12-02 16:28:42 +08:00
|
|
|
if (ret)
|
|
|
|
goto out_free_buffer;
|
|
|
|
|
|
|
|
/* process connect LS completion */
|
|
|
|
|
|
|
|
/* validate the ACC response */
|
|
|
|
if (conn_acc->hdr.w0.ls_cmd != FCNVME_LS_ACC)
|
|
|
|
fcret = VERR_LSACC;
|
2017-03-24 11:41:25 +08:00
|
|
|
else if (conn_acc->hdr.desc_list_len !=
|
2016-12-02 16:28:42 +08:00
|
|
|
fcnvme_lsdesc_len(sizeof(struct fcnvme_ls_cr_conn_acc)))
|
|
|
|
fcret = VERR_CR_CONN_ACC_LEN;
|
2017-03-24 11:41:25 +08:00
|
|
|
else if (conn_acc->hdr.rqst.desc_tag != cpu_to_be32(FCNVME_LSDESC_RQST))
|
2016-12-02 16:28:42 +08:00
|
|
|
fcret = VERR_LSDESC_RQST;
|
|
|
|
else if (conn_acc->hdr.rqst.desc_len !=
|
|
|
|
fcnvme_lsdesc_len(sizeof(struct fcnvme_lsdesc_rqst)))
|
|
|
|
fcret = VERR_LSDESC_RQST_LEN;
|
|
|
|
else if (conn_acc->hdr.rqst.w0.ls_cmd != FCNVME_LS_CREATE_CONNECTION)
|
|
|
|
fcret = VERR_CR_CONN;
|
|
|
|
else if (conn_acc->connectid.desc_tag !=
|
|
|
|
cpu_to_be32(FCNVME_LSDESC_CONN_ID))
|
|
|
|
fcret = VERR_CONN_ID;
|
|
|
|
else if (conn_acc->connectid.desc_len !=
|
|
|
|
fcnvme_lsdesc_len(sizeof(struct fcnvme_lsdesc_conn_id)))
|
|
|
|
fcret = VERR_CONN_ID_LEN;
|
|
|
|
|
|
|
|
if (fcret) {
|
|
|
|
ret = -EBADF;
|
|
|
|
dev_err(ctrl->dev,
|
|
|
|
"q %d connect failed: %s\n",
|
|
|
|
queue->qnum, validation_errors[fcret]);
|
|
|
|
} else {
|
|
|
|
queue->connection_id =
|
|
|
|
be64_to_cpu(conn_acc->connectid.connection_id);
|
|
|
|
set_bit(NVME_FC_Q_CONNECTED, &queue->flags);
|
|
|
|
}
|
|
|
|
|
|
|
|
out_free_buffer:
|
|
|
|
kfree(lsop);
|
|
|
|
out_no_memory:
|
|
|
|
if (ret)
|
|
|
|
dev_err(ctrl->dev,
|
|
|
|
"queue %d connect command failed (%d).\n",
|
|
|
|
queue->qnum, ret);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
nvme_fc_disconnect_assoc_done(struct nvmefc_ls_req *lsreq, int status)
|
|
|
|
{
|
|
|
|
struct nvmefc_ls_req_op *lsop = ls_req_to_lsop(lsreq);
|
|
|
|
|
2017-04-12 02:35:08 +08:00
|
|
|
__nvme_fc_finish_ls_req(lsop);
|
2016-12-02 16:28:42 +08:00
|
|
|
|
2018-08-10 17:24:02 +08:00
|
|
|
/* fc-nvme initiator doesn't care about success or failure of cmd */
|
2016-12-02 16:28:42 +08:00
|
|
|
|
|
|
|
kfree(lsop);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* This routine sends a FC-NVME LS to disconnect (aka terminate)
|
|
|
|
* the FC-NVME Association. Terminating the association also
|
|
|
|
* terminates the FC-NVME connections (per queue, both admin and io
|
|
|
|
* queues) that are part of the association. E.g. things are torn
|
|
|
|
* down, and the related FC-NVME Association ID and Connection IDs
|
|
|
|
* become invalid.
|
|
|
|
*
|
|
|
|
* The behavior of the fc-nvme initiator is such that it's
|
|
|
|
* understanding of the association and connections will implicitly
|
|
|
|
* be torn down. The action is implicit as it may be due to a loss of
|
|
|
|
* connectivity with the fc-nvme target, so you may never get a
|
|
|
|
* response even if you tried. As such, the action of this routine
|
|
|
|
* is to asynchronously send the LS, ignore any results of the LS, and
|
|
|
|
* continue on with terminating the association. If the fc-nvme target
|
|
|
|
* is present and receives the LS, it too can tear down.
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
nvme_fc_xmt_disconnect_assoc(struct nvme_fc_ctrl *ctrl)
|
|
|
|
{
|
|
|
|
struct fcnvme_ls_disconnect_rqst *discon_rqst;
|
|
|
|
struct fcnvme_ls_disconnect_acc *discon_acc;
|
|
|
|
struct nvmefc_ls_req_op *lsop;
|
|
|
|
struct nvmefc_ls_req *lsreq;
|
2017-04-12 02:35:08 +08:00
|
|
|
int ret;
|
2016-12-02 16:28:42 +08:00
|
|
|
|
|
|
|
lsop = kzalloc((sizeof(*lsop) +
|
|
|
|
ctrl->lport->ops->lsrqst_priv_sz +
|
|
|
|
sizeof(*discon_rqst) + sizeof(*discon_acc)),
|
|
|
|
GFP_KERNEL);
|
|
|
|
if (!lsop)
|
|
|
|
/* couldn't sent it... too bad */
|
|
|
|
return;
|
|
|
|
|
|
|
|
lsreq = &lsop->ls_req;
|
|
|
|
|
|
|
|
lsreq->private = (void *)&lsop[1];
|
|
|
|
discon_rqst = (struct fcnvme_ls_disconnect_rqst *)
|
|
|
|
(lsreq->private + ctrl->lport->ops->lsrqst_priv_sz);
|
|
|
|
discon_acc = (struct fcnvme_ls_disconnect_acc *)&discon_rqst[1];
|
|
|
|
|
|
|
|
discon_rqst->w0.ls_cmd = FCNVME_LS_DISCONNECT;
|
|
|
|
discon_rqst->desc_list_len = cpu_to_be32(
|
|
|
|
sizeof(struct fcnvme_lsdesc_assoc_id) +
|
|
|
|
sizeof(struct fcnvme_lsdesc_disconn_cmd));
|
|
|
|
|
|
|
|
discon_rqst->associd.desc_tag = cpu_to_be32(FCNVME_LSDESC_ASSOC_ID);
|
|
|
|
discon_rqst->associd.desc_len =
|
|
|
|
fcnvme_lsdesc_len(
|
|
|
|
sizeof(struct fcnvme_lsdesc_assoc_id));
|
|
|
|
|
|
|
|
discon_rqst->associd.association_id = cpu_to_be64(ctrl->association_id);
|
|
|
|
|
|
|
|
discon_rqst->discon_cmd.desc_tag = cpu_to_be32(
|
|
|
|
FCNVME_LSDESC_DISCONN_CMD);
|
|
|
|
discon_rqst->discon_cmd.desc_len =
|
|
|
|
fcnvme_lsdesc_len(
|
|
|
|
sizeof(struct fcnvme_lsdesc_disconn_cmd));
|
|
|
|
discon_rqst->discon_cmd.scope = FCNVME_DISCONN_ASSOCIATION;
|
|
|
|
discon_rqst->discon_cmd.id = cpu_to_be64(ctrl->association_id);
|
|
|
|
|
|
|
|
lsreq->rqstaddr = discon_rqst;
|
|
|
|
lsreq->rqstlen = sizeof(*discon_rqst);
|
|
|
|
lsreq->rspaddr = discon_acc;
|
|
|
|
lsreq->rsplen = sizeof(*discon_acc);
|
|
|
|
lsreq->timeout = NVME_FC_CONNECT_TIMEOUT_SEC;
|
|
|
|
|
2017-04-12 02:35:08 +08:00
|
|
|
ret = nvme_fc_send_ls_req_async(ctrl->rport, lsop,
|
|
|
|
nvme_fc_disconnect_assoc_done);
|
|
|
|
if (ret)
|
|
|
|
kfree(lsop);
|
2016-12-02 16:28:42 +08:00
|
|
|
|
|
|
|
/* only meaningful part to terminating the association */
|
|
|
|
ctrl->association_id = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/* *********************** NVME Ctrl Routines **************************** */
|
|
|
|
|
2017-06-02 13:54:21 +08:00
|
|
|
static void nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg);
|
2016-12-02 16:28:42 +08:00
|
|
|
|
|
|
|
static void
|
|
|
|
__nvme_fc_exit_request(struct nvme_fc_ctrl *ctrl,
|
|
|
|
struct nvme_fc_fcp_op *op)
|
|
|
|
{
|
|
|
|
fc_dma_unmap_single(ctrl->lport->dev, op->fcp_req.rspdma,
|
|
|
|
sizeof(op->rsp_iu), DMA_FROM_DEVICE);
|
|
|
|
fc_dma_unmap_single(ctrl->lport->dev, op->fcp_req.cmddma,
|
|
|
|
sizeof(op->cmd_iu), DMA_TO_DEVICE);
|
|
|
|
|
|
|
|
atomic_set(&op->state, FCPOP_STATE_UNINIT);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
2017-05-02 00:19:08 +08:00
|
|
|
nvme_fc_exit_request(struct blk_mq_tag_set *set, struct request *rq,
|
|
|
|
unsigned int hctx_idx)
|
2016-12-02 16:28:42 +08:00
|
|
|
{
|
|
|
|
struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq);
|
|
|
|
|
2017-05-02 00:19:08 +08:00
|
|
|
return __nvme_fc_exit_request(set->driver_data, op);
|
2016-12-02 16:28:42 +08:00
|
|
|
}
|
|
|
|
|
2017-04-23 23:30:07 +08:00
|
|
|
static int
|
|
|
|
__nvme_fc_abort_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_fcp_op *op)
|
|
|
|
{
|
2018-02-06 22:48:29 +08:00
|
|
|
unsigned long flags;
|
|
|
|
int opstate;
|
|
|
|
|
|
|
|
spin_lock_irqsave(&ctrl->lock, flags);
|
|
|
|
opstate = atomic_xchg(&op->state, FCPOP_STATE_ABORTED);
|
|
|
|
if (opstate != FCPOP_STATE_ACTIVE)
|
|
|
|
atomic_set(&op->state, opstate);
|
|
|
|
else if (ctrl->flags & FCCTRL_TERMIO)
|
|
|
|
ctrl->iocnt++;
|
|
|
|
spin_unlock_irqrestore(&ctrl->lock, flags);
|
2017-04-23 23:30:07 +08:00
|
|
|
|
2018-02-06 22:48:29 +08:00
|
|
|
if (opstate != FCPOP_STATE_ACTIVE)
|
2017-04-23 23:30:07 +08:00
|
|
|
return -ECANCELED;
|
|
|
|
|
|
|
|
ctrl->lport->ops->fcp_abort(&ctrl->lport->localport,
|
|
|
|
&ctrl->rport->remoteport,
|
|
|
|
op->queue->lldd_handle,
|
|
|
|
&op->fcp_req);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2016-12-02 16:28:42 +08:00
|
|
|
static void
|
2017-04-23 23:30:07 +08:00
|
|
|
nvme_fc_abort_aen_ops(struct nvme_fc_ctrl *ctrl)
|
2016-12-02 16:28:42 +08:00
|
|
|
{
|
|
|
|
struct nvme_fc_fcp_op *aen_op = ctrl->aen_ops;
|
2018-02-06 22:48:29 +08:00
|
|
|
int i;
|
2017-04-23 23:30:07 +08:00
|
|
|
|
2018-11-15 08:35:10 +08:00
|
|
|
/* ensure we've initialized the ops once */
|
|
|
|
if (!(aen_op->flags & FCOP_FLAGS_AEN))
|
|
|
|
return;
|
|
|
|
|
2018-02-06 22:48:29 +08:00
|
|
|
for (i = 0; i < NVME_NR_AEN_COMMANDS; i++, aen_op++)
|
|
|
|
__nvme_fc_abort_op(ctrl, aen_op);
|
2016-12-02 16:28:42 +08:00
|
|
|
}
|
|
|
|
|
2018-02-06 22:48:30 +08:00
|
|
|
static inline void
|
2017-04-23 23:30:07 +08:00
|
|
|
__nvme_fc_fcpop_chk_teardowns(struct nvme_fc_ctrl *ctrl,
|
2018-02-06 22:48:29 +08:00
|
|
|
struct nvme_fc_fcp_op *op, int opstate)
|
2017-04-23 23:30:07 +08:00
|
|
|
{
|
|
|
|
unsigned long flags;
|
|
|
|
|
2018-02-06 22:48:30 +08:00
|
|
|
if (opstate == FCPOP_STATE_ABORTED) {
|
|
|
|
spin_lock_irqsave(&ctrl->lock, flags);
|
|
|
|
if (ctrl->flags & FCCTRL_TERMIO) {
|
|
|
|
if (!--ctrl->iocnt)
|
|
|
|
wake_up(&ctrl->ioabort_wait);
|
|
|
|
}
|
|
|
|
spin_unlock_irqrestore(&ctrl->lock, flags);
|
2017-04-23 23:30:08 +08:00
|
|
|
}
|
2017-04-23 23:30:07 +08:00
|
|
|
}
|
|
|
|
|
2017-04-21 16:44:06 +08:00
|
|
|
static void
|
2016-12-02 16:28:42 +08:00
|
|
|
nvme_fc_fcpio_done(struct nvmefc_fcp_req *req)
|
|
|
|
{
|
|
|
|
struct nvme_fc_fcp_op *op = fcp_req_to_fcp_op(req);
|
|
|
|
struct request *rq = op->rq;
|
|
|
|
struct nvmefc_fcp_req *freq = &op->fcp_req;
|
|
|
|
struct nvme_fc_ctrl *ctrl = op->ctrl;
|
|
|
|
struct nvme_fc_queue *queue = op->queue;
|
|
|
|
struct nvme_completion *cqe = &op->rsp_iu.cqe;
|
2017-04-23 23:30:06 +08:00
|
|
|
struct nvme_command *sqe = &op->cmd_iu.sqe;
|
2017-04-20 22:02:56 +08:00
|
|
|
__le16 status = cpu_to_le16(NVME_SC_SUCCESS << 1);
|
2017-04-20 22:02:57 +08:00
|
|
|
union nvme_result result;
|
2017-10-20 07:11:38 +08:00
|
|
|
bool terminate_assoc = true;
|
2018-02-06 22:48:29 +08:00
|
|
|
int opstate;
|
2016-12-02 16:28:42 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* WARNING:
|
|
|
|
* The current linux implementation of a nvme controller
|
|
|
|
* allocates a single tag set for all io queues and sizes
|
|
|
|
* the io queues to fully hold all possible tags. Thus, the
|
|
|
|
* implementation does not reference or care about the sqhd
|
|
|
|
* value as it never needs to use the sqhd/sqtail pointers
|
|
|
|
* for submission pacing.
|
|
|
|
*
|
|
|
|
* This affects the FC-NVME implementation in two ways:
|
|
|
|
* 1) As the value doesn't matter, we don't need to waste
|
|
|
|
* cycles extracting it from ERSPs and stamping it in the
|
|
|
|
* cases where the transport fabricates CQEs on successful
|
|
|
|
* completions.
|
|
|
|
* 2) The FC-NVME implementation requires that delivery of
|
|
|
|
* ERSP completions are to go back to the nvme layer in order
|
|
|
|
* relative to the rsn, such that the sqhd value will always
|
|
|
|
* be "in order" for the nvme layer. As the nvme layer in
|
|
|
|
* linux doesn't care about sqhd, there's no need to return
|
|
|
|
* them in order.
|
|
|
|
*
|
|
|
|
* Additionally:
|
|
|
|
* As the core nvme layer in linux currently does not look at
|
|
|
|
* every field in the cqe - in cases where the FC transport must
|
|
|
|
* fabricate a CQE, the following fields will not be set as they
|
|
|
|
* are not referenced:
|
|
|
|
* cqe.sqid, cqe.sqhd, cqe.command_id
|
2017-06-02 13:54:21 +08:00
|
|
|
*
|
|
|
|
* Failure or error of an individual i/o, in a transport
|
|
|
|
* detected fashion unrelated to the nvme completion status,
|
|
|
|
* potentially cause the initiator and target sides to get out
|
|
|
|
* of sync on SQ head/tail (aka outstanding io count allowed).
|
|
|
|
* Per FC-NVME spec, failure of an individual command requires
|
|
|
|
* the connection to be terminated, which in turn requires the
|
|
|
|
* association to be terminated.
|
2016-12-02 16:28:42 +08:00
|
|
|
*/
|
|
|
|
|
2018-02-06 22:48:29 +08:00
|
|
|
opstate = atomic_xchg(&op->state, FCPOP_STATE_COMPLETE);
|
|
|
|
|
2016-12-02 16:28:42 +08:00
|
|
|
fc_dma_sync_single_for_cpu(ctrl->lport->dev, op->fcp_req.rspdma,
|
|
|
|
sizeof(op->rsp_iu), DMA_FROM_DEVICE);
|
|
|
|
|
2018-02-06 22:48:29 +08:00
|
|
|
if (opstate == FCPOP_STATE_ABORTED)
|
2019-08-06 15:14:06 +08:00
|
|
|
status = cpu_to_le16(NVME_SC_HOST_PATH_ERROR << 1);
|
|
|
|
else if (freq->status) {
|
|
|
|
status = cpu_to_le16(NVME_SC_HOST_PATH_ERROR << 1);
|
|
|
|
dev_info(ctrl->ctrl.device,
|
|
|
|
"NVME-FC{%d}: io failed due to lldd error %d\n",
|
|
|
|
ctrl->cnum, freq->status);
|
|
|
|
}
|
2016-12-02 16:28:42 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* For the linux implementation, if we have an unsuccesful
|
|
|
|
* status, they blk-mq layer can typically be called with the
|
|
|
|
* non-zero status and the content of the cqe isn't important.
|
|
|
|
*/
|
|
|
|
if (status)
|
|
|
|
goto done;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* command completed successfully relative to the wire
|
|
|
|
* protocol. However, validate anything received and
|
|
|
|
* extract the status and result from the cqe (create it
|
|
|
|
* where necessary).
|
|
|
|
*/
|
|
|
|
|
|
|
|
switch (freq->rcv_rsplen) {
|
|
|
|
|
|
|
|
case 0:
|
|
|
|
case NVME_FC_SIZEOF_ZEROS_RSP:
|
|
|
|
/*
|
|
|
|
* No response payload or 12 bytes of payload (which
|
|
|
|
* should all be zeros) are considered successful and
|
|
|
|
* no payload in the CQE by the transport.
|
|
|
|
*/
|
|
|
|
if (freq->transferred_length !=
|
2019-08-06 15:14:06 +08:00
|
|
|
be32_to_cpu(op->cmd_iu.data_len)) {
|
|
|
|
status = cpu_to_le16(NVME_SC_HOST_PATH_ERROR << 1);
|
|
|
|
dev_info(ctrl->ctrl.device,
|
|
|
|
"NVME-FC{%d}: io failed due to bad transfer "
|
|
|
|
"length: %d vs expected %d\n",
|
|
|
|
ctrl->cnum, freq->transferred_length,
|
|
|
|
be32_to_cpu(op->cmd_iu.data_len));
|
2016-12-02 16:28:42 +08:00
|
|
|
goto done;
|
|
|
|
}
|
2017-04-20 22:02:57 +08:00
|
|
|
result.u64 = 0;
|
2016-12-02 16:28:42 +08:00
|
|
|
break;
|
|
|
|
|
|
|
|
case sizeof(struct nvme_fc_ersp_iu):
|
|
|
|
/*
|
|
|
|
* The ERSP IU contains a full completion with CQE.
|
|
|
|
* Validate ERSP IU and look at cqe.
|
|
|
|
*/
|
|
|
|
if (unlikely(be16_to_cpu(op->rsp_iu.iu_len) !=
|
|
|
|
(freq->rcv_rsplen / 4) ||
|
|
|
|
be32_to_cpu(op->rsp_iu.xfrd_len) !=
|
|
|
|
freq->transferred_length ||
|
2017-03-24 11:41:23 +08:00
|
|
|
op->rsp_iu.status_code ||
|
2017-04-23 23:30:06 +08:00
|
|
|
sqe->common.command_id != cqe->command_id)) {
|
2019-08-06 15:14:06 +08:00
|
|
|
status = cpu_to_le16(NVME_SC_HOST_PATH_ERROR << 1);
|
|
|
|
dev_info(ctrl->ctrl.device,
|
|
|
|
"NVME-FC{%d}: io failed due to bad NVMe_ERSP: "
|
|
|
|
"iu len %d, xfr len %d vs %d, status code "
|
|
|
|
"%d, cmdid %d vs %d\n",
|
|
|
|
ctrl->cnum, be16_to_cpu(op->rsp_iu.iu_len),
|
|
|
|
be32_to_cpu(op->rsp_iu.xfrd_len),
|
|
|
|
freq->transferred_length,
|
|
|
|
op->rsp_iu.status_code,
|
|
|
|
sqe->common.command_id,
|
|
|
|
cqe->command_id);
|
2016-12-02 16:28:42 +08:00
|
|
|
goto done;
|
|
|
|
}
|
2017-04-20 22:02:57 +08:00
|
|
|
result = cqe->result;
|
2017-04-20 22:02:56 +08:00
|
|
|
status = cqe->status;
|
2016-12-02 16:28:42 +08:00
|
|
|
break;
|
|
|
|
|
|
|
|
default:
|
2019-08-06 15:14:06 +08:00
|
|
|
status = cpu_to_le16(NVME_SC_HOST_PATH_ERROR << 1);
|
|
|
|
dev_info(ctrl->ctrl.device,
|
|
|
|
"NVME-FC{%d}: io failed due to odd NVMe_xRSP iu "
|
|
|
|
"len %d\n",
|
|
|
|
ctrl->cnum, freq->rcv_rsplen);
|
2016-12-02 16:28:42 +08:00
|
|
|
goto done;
|
|
|
|
}
|
|
|
|
|
2017-06-02 13:54:21 +08:00
|
|
|
terminate_assoc = false;
|
|
|
|
|
2016-12-02 16:28:42 +08:00
|
|
|
done:
|
2017-04-23 23:30:07 +08:00
|
|
|
if (op->flags & FCOP_FLAGS_AEN) {
|
2017-04-20 22:02:57 +08:00
|
|
|
nvme_complete_async_event(&queue->ctrl->ctrl, status, &result);
|
2018-02-06 22:48:29 +08:00
|
|
|
__nvme_fc_fcpop_chk_teardowns(ctrl, op, opstate);
|
2017-04-23 23:30:07 +08:00
|
|
|
atomic_set(&op->state, FCPOP_STATE_IDLE);
|
|
|
|
op->flags = FCOP_FLAGS_AEN; /* clear other flags */
|
2016-12-02 16:28:42 +08:00
|
|
|
nvme_fc_ctrl_put(ctrl);
|
2017-06-02 13:54:21 +08:00
|
|
|
goto check_error;
|
2016-12-02 16:28:42 +08:00
|
|
|
}
|
|
|
|
|
2018-02-06 22:48:30 +08:00
|
|
|
__nvme_fc_fcpop_chk_teardowns(ctrl, op, opstate);
|
|
|
|
nvme_end_request(rq, status, result);
|
2017-06-02 13:54:21 +08:00
|
|
|
|
|
|
|
check_error:
|
|
|
|
if (terminate_assoc)
|
|
|
|
nvme_fc_error_recovery(ctrl, "transport detected io error");
|
2016-12-02 16:28:42 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
|
|
__nvme_fc_init_request(struct nvme_fc_ctrl *ctrl,
|
|
|
|
struct nvme_fc_queue *queue, struct nvme_fc_fcp_op *op,
|
|
|
|
struct request *rq, u32 rqno)
|
|
|
|
{
|
2018-10-09 05:28:45 +08:00
|
|
|
struct nvme_fcp_op_w_sgl *op_w_sgl =
|
|
|
|
container_of(op, typeof(*op_w_sgl), op);
|
2016-12-02 16:28:42 +08:00
|
|
|
struct nvme_fc_cmd_iu *cmdiu = &op->cmd_iu;
|
|
|
|
int ret = 0;
|
|
|
|
|
|
|
|
memset(op, 0, sizeof(*op));
|
|
|
|
op->fcp_req.cmdaddr = &op->cmd_iu;
|
|
|
|
op->fcp_req.cmdlen = sizeof(op->cmd_iu);
|
|
|
|
op->fcp_req.rspaddr = &op->rsp_iu;
|
|
|
|
op->fcp_req.rsplen = sizeof(op->rsp_iu);
|
|
|
|
op->fcp_req.done = nvme_fc_fcpio_done;
|
|
|
|
op->ctrl = ctrl;
|
|
|
|
op->queue = queue;
|
|
|
|
op->rq = rq;
|
|
|
|
op->rqno = rqno;
|
|
|
|
|
|
|
|
cmdiu->scsi_id = NVME_CMD_SCSI_ID;
|
|
|
|
cmdiu->fc_id = NVME_CMD_FC_ID;
|
|
|
|
cmdiu->iu_len = cpu_to_be16(sizeof(*cmdiu) / sizeof(u32));
|
|
|
|
|
|
|
|
op->fcp_req.cmddma = fc_dma_map_single(ctrl->lport->dev,
|
|
|
|
&op->cmd_iu, sizeof(op->cmd_iu), DMA_TO_DEVICE);
|
|
|
|
if (fc_dma_mapping_error(ctrl->lport->dev, op->fcp_req.cmddma)) {
|
|
|
|
dev_err(ctrl->dev,
|
|
|
|
"FCP Op failed - cmdiu dma mapping failed.\n");
|
|
|
|
ret = EFAULT;
|
|
|
|
goto out_on_error;
|
|
|
|
}
|
|
|
|
|
|
|
|
op->fcp_req.rspdma = fc_dma_map_single(ctrl->lport->dev,
|
|
|
|
&op->rsp_iu, sizeof(op->rsp_iu),
|
|
|
|
DMA_FROM_DEVICE);
|
|
|
|
if (fc_dma_mapping_error(ctrl->lport->dev, op->fcp_req.rspdma)) {
|
|
|
|
dev_err(ctrl->dev,
|
|
|
|
"FCP Op failed - rspiu dma mapping failed.\n");
|
|
|
|
ret = EFAULT;
|
|
|
|
}
|
|
|
|
|
|
|
|
atomic_set(&op->state, FCPOP_STATE_IDLE);
|
|
|
|
out_on_error:
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
2017-05-02 00:19:08 +08:00
|
|
|
nvme_fc_init_request(struct blk_mq_tag_set *set, struct request *rq,
|
|
|
|
unsigned int hctx_idx, unsigned int numa_node)
|
2016-12-02 16:28:42 +08:00
|
|
|
{
|
2017-05-02 00:19:08 +08:00
|
|
|
struct nvme_fc_ctrl *ctrl = set->driver_data;
|
2018-10-09 05:28:45 +08:00
|
|
|
struct nvme_fcp_op_w_sgl *op = blk_mq_rq_to_pdu(rq);
|
2017-06-13 15:15:20 +08:00
|
|
|
int queue_idx = (set == &ctrl->tag_set) ? hctx_idx + 1 : 0;
|
|
|
|
struct nvme_fc_queue *queue = &ctrl->queues[queue_idx];
|
2018-10-09 05:28:46 +08:00
|
|
|
int res;
|
2016-12-02 16:28:42 +08:00
|
|
|
|
2018-10-09 05:28:46 +08:00
|
|
|
res = __nvme_fc_init_request(ctrl, queue, &op->op, rq, queue->rqcnt++);
|
|
|
|
if (res)
|
|
|
|
return res;
|
|
|
|
op->op.fcp_req.first_sgl = &op->sgl[0];
|
2018-10-28 03:41:54 +08:00
|
|
|
op->op.fcp_req.private = &op->priv[0];
|
2018-11-27 01:01:30 +08:00
|
|
|
nvme_req(rq)->ctrl = &ctrl->ctrl;
|
2018-10-09 05:28:46 +08:00
|
|
|
return res;
|
2016-12-02 16:28:42 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
|
|
nvme_fc_init_aen_ops(struct nvme_fc_ctrl *ctrl)
|
|
|
|
{
|
|
|
|
struct nvme_fc_fcp_op *aen_op;
|
|
|
|
struct nvme_fc_cmd_iu *cmdiu;
|
|
|
|
struct nvme_command *sqe;
|
2017-04-23 23:30:08 +08:00
|
|
|
void *private;
|
2016-12-02 16:28:42 +08:00
|
|
|
int i, ret;
|
|
|
|
|
|
|
|
aen_op = ctrl->aen_ops;
|
2017-11-08 06:13:10 +08:00
|
|
|
for (i = 0; i < NVME_NR_AEN_COMMANDS; i++, aen_op++) {
|
2017-04-23 23:30:08 +08:00
|
|
|
private = kzalloc(ctrl->lport->ops->fcprqst_priv_sz,
|
|
|
|
GFP_KERNEL);
|
|
|
|
if (!private)
|
|
|
|
return -ENOMEM;
|
|
|
|
|
2016-12-02 16:28:42 +08:00
|
|
|
cmdiu = &aen_op->cmd_iu;
|
|
|
|
sqe = &cmdiu->sqe;
|
|
|
|
ret = __nvme_fc_init_request(ctrl, &ctrl->queues[0],
|
|
|
|
aen_op, (struct request *)NULL,
|
2017-11-08 06:13:10 +08:00
|
|
|
(NVME_AQ_BLK_MQ_DEPTH + i));
|
2017-04-23 23:30:08 +08:00
|
|
|
if (ret) {
|
|
|
|
kfree(private);
|
2016-12-02 16:28:42 +08:00
|
|
|
return ret;
|
2017-04-23 23:30:08 +08:00
|
|
|
}
|
2016-12-02 16:28:42 +08:00
|
|
|
|
2017-04-23 23:30:07 +08:00
|
|
|
aen_op->flags = FCOP_FLAGS_AEN;
|
2017-04-23 23:30:08 +08:00
|
|
|
aen_op->fcp_req.private = private;
|
2017-04-23 23:30:07 +08:00
|
|
|
|
2016-12-02 16:28:42 +08:00
|
|
|
memset(sqe, 0, sizeof(*sqe));
|
|
|
|
sqe->common.opcode = nvme_admin_async_event;
|
2017-04-23 23:30:07 +08:00
|
|
|
/* Note: core layer may overwrite the sqe.command_id value */
|
2017-11-08 06:13:10 +08:00
|
|
|
sqe->common.command_id = NVME_AQ_BLK_MQ_DEPTH + i;
|
2016-12-02 16:28:42 +08:00
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2017-04-23 23:30:08 +08:00
|
|
|
static void
|
|
|
|
nvme_fc_term_aen_ops(struct nvme_fc_ctrl *ctrl)
|
|
|
|
{
|
|
|
|
struct nvme_fc_fcp_op *aen_op;
|
|
|
|
int i;
|
|
|
|
|
|
|
|
aen_op = ctrl->aen_ops;
|
2017-11-08 06:13:10 +08:00
|
|
|
for (i = 0; i < NVME_NR_AEN_COMMANDS; i++, aen_op++) {
|
2017-04-23 23:30:08 +08:00
|
|
|
if (!aen_op->fcp_req.private)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
__nvme_fc_exit_request(ctrl, aen_op);
|
|
|
|
|
|
|
|
kfree(aen_op->fcp_req.private);
|
|
|
|
aen_op->fcp_req.private = NULL;
|
|
|
|
}
|
|
|
|
}
|
2016-12-02 16:28:42 +08:00
|
|
|
|
|
|
|
static inline void
|
|
|
|
__nvme_fc_init_hctx(struct blk_mq_hw_ctx *hctx, struct nvme_fc_ctrl *ctrl,
|
|
|
|
unsigned int qidx)
|
|
|
|
{
|
|
|
|
struct nvme_fc_queue *queue = &ctrl->queues[qidx];
|
|
|
|
|
|
|
|
hctx->driver_data = queue;
|
|
|
|
queue->hctx = hctx;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
|
|
nvme_fc_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
|
|
|
|
unsigned int hctx_idx)
|
|
|
|
{
|
|
|
|
struct nvme_fc_ctrl *ctrl = data;
|
|
|
|
|
|
|
|
__nvme_fc_init_hctx(hctx, ctrl, hctx_idx + 1);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
|
|
nvme_fc_init_admin_hctx(struct blk_mq_hw_ctx *hctx, void *data,
|
|
|
|
unsigned int hctx_idx)
|
|
|
|
{
|
|
|
|
struct nvme_fc_ctrl *ctrl = data;
|
|
|
|
|
|
|
|
__nvme_fc_init_hctx(hctx, ctrl, hctx_idx);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
2017-11-08 06:13:11 +08:00
|
|
|
nvme_fc_init_queue(struct nvme_fc_ctrl *ctrl, int idx)
|
2016-12-02 16:28:42 +08:00
|
|
|
{
|
|
|
|
struct nvme_fc_queue *queue;
|
|
|
|
|
|
|
|
queue = &ctrl->queues[idx];
|
|
|
|
memset(queue, 0, sizeof(*queue));
|
|
|
|
queue->ctrl = ctrl;
|
|
|
|
queue->qnum = idx;
|
2019-04-09 02:15:19 +08:00
|
|
|
atomic_set(&queue->csn, 0);
|
2016-12-02 16:28:42 +08:00
|
|
|
queue->dev = ctrl->dev;
|
|
|
|
|
|
|
|
if (idx > 0)
|
|
|
|
queue->cmnd_capsule_len = ctrl->ctrl.ioccsz * 16;
|
|
|
|
else
|
|
|
|
queue->cmnd_capsule_len = sizeof(struct nvme_command);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Considered whether we should allocate buffers for all SQEs
|
|
|
|
* and CQEs and dma map them - mapping their respective entries
|
|
|
|
* into the request structures (kernel vm addr and dma address)
|
|
|
|
* thus the driver could use the buffers/mappings directly.
|
|
|
|
* It only makes sense if the LLDD would use them for its
|
|
|
|
* messaging api. It's very unlikely most adapter api's would use
|
|
|
|
* a native NVME sqe/cqe. More reasonable if FC-NVME IU payload
|
|
|
|
* structures were used instead.
|
|
|
|
*/
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* This routine terminates a queue at the transport level.
|
|
|
|
* The transport has already ensured that all outstanding ios on
|
|
|
|
* the queue have been terminated.
|
|
|
|
* The transport will send a Disconnect LS request to terminate
|
|
|
|
* the queue's connection. Termination of the admin queue will also
|
|
|
|
* terminate the association at the target.
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
nvme_fc_free_queue(struct nvme_fc_queue *queue)
|
|
|
|
{
|
|
|
|
if (!test_and_clear_bit(NVME_FC_Q_CONNECTED, &queue->flags))
|
|
|
|
return;
|
|
|
|
|
2017-10-24 20:25:21 +08:00
|
|
|
clear_bit(NVME_FC_Q_LIVE, &queue->flags);
|
2016-12-02 16:28:42 +08:00
|
|
|
/*
|
|
|
|
* Current implementation never disconnects a single queue.
|
|
|
|
* It always terminates a whole association. So there is never
|
|
|
|
* a disconnect(queue) LS sent to the target.
|
|
|
|
*/
|
|
|
|
|
|
|
|
queue->connection_id = 0;
|
2019-04-09 02:15:19 +08:00
|
|
|
atomic_set(&queue->csn, 0);
|
2016-12-02 16:28:42 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
__nvme_fc_delete_hw_queue(struct nvme_fc_ctrl *ctrl,
|
|
|
|
struct nvme_fc_queue *queue, unsigned int qidx)
|
|
|
|
{
|
|
|
|
if (ctrl->lport->ops->delete_queue)
|
|
|
|
ctrl->lport->ops->delete_queue(&ctrl->lport->localport, qidx,
|
|
|
|
queue->lldd_handle);
|
|
|
|
queue->lldd_handle = NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
nvme_fc_free_io_queues(struct nvme_fc_ctrl *ctrl)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
|
2017-04-24 15:58:29 +08:00
|
|
|
for (i = 1; i < ctrl->ctrl.queue_count; i++)
|
2016-12-02 16:28:42 +08:00
|
|
|
nvme_fc_free_queue(&ctrl->queues[i]);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
|
|
__nvme_fc_create_hw_queue(struct nvme_fc_ctrl *ctrl,
|
|
|
|
struct nvme_fc_queue *queue, unsigned int qidx, u16 qsize)
|
|
|
|
{
|
|
|
|
int ret = 0;
|
|
|
|
|
|
|
|
queue->lldd_handle = NULL;
|
|
|
|
if (ctrl->lport->ops->create_queue)
|
|
|
|
ret = ctrl->lport->ops->create_queue(&ctrl->lport->localport,
|
|
|
|
qidx, qsize, &queue->lldd_handle);
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
nvme_fc_delete_hw_io_queues(struct nvme_fc_ctrl *ctrl)
|
|
|
|
{
|
2017-04-24 15:58:29 +08:00
|
|
|
struct nvme_fc_queue *queue = &ctrl->queues[ctrl->ctrl.queue_count - 1];
|
2016-12-02 16:28:42 +08:00
|
|
|
int i;
|
|
|
|
|
2017-04-24 15:58:29 +08:00
|
|
|
for (i = ctrl->ctrl.queue_count - 1; i >= 1; i--, queue--)
|
2016-12-02 16:28:42 +08:00
|
|
|
__nvme_fc_delete_hw_queue(ctrl, queue, i);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
|
|
nvme_fc_create_hw_io_queues(struct nvme_fc_ctrl *ctrl, u16 qsize)
|
|
|
|
{
|
|
|
|
struct nvme_fc_queue *queue = &ctrl->queues[1];
|
2016-12-15 21:20:48 +08:00
|
|
|
int i, ret;
|
2016-12-02 16:28:42 +08:00
|
|
|
|
2017-04-24 15:58:29 +08:00
|
|
|
for (i = 1; i < ctrl->ctrl.queue_count; i++, queue++) {
|
2016-12-02 16:28:42 +08:00
|
|
|
ret = __nvme_fc_create_hw_queue(ctrl, queue, i, qsize);
|
2016-12-15 21:20:48 +08:00
|
|
|
if (ret)
|
|
|
|
goto delete_queues;
|
2016-12-02 16:28:42 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
2016-12-15 21:20:48 +08:00
|
|
|
|
|
|
|
delete_queues:
|
|
|
|
for (; i >= 0; i--)
|
|
|
|
__nvme_fc_delete_hw_queue(ctrl, &ctrl->queues[i], i);
|
|
|
|
return ret;
|
2016-12-02 16:28:42 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
|
|
nvme_fc_connect_io_queues(struct nvme_fc_ctrl *ctrl, u16 qsize)
|
|
|
|
{
|
|
|
|
int i, ret = 0;
|
|
|
|
|
2017-04-24 15:58:29 +08:00
|
|
|
for (i = 1; i < ctrl->ctrl.queue_count; i++) {
|
2016-12-02 16:28:42 +08:00
|
|
|
ret = nvme_fc_connect_queue(ctrl, &ctrl->queues[i], qsize,
|
|
|
|
(qsize / 5));
|
|
|
|
if (ret)
|
|
|
|
break;
|
2018-12-15 03:06:08 +08:00
|
|
|
ret = nvmf_connect_io_queue(&ctrl->ctrl, i, false);
|
2016-12-02 16:28:42 +08:00
|
|
|
if (ret)
|
|
|
|
break;
|
2017-10-24 20:25:21 +08:00
|
|
|
|
|
|
|
set_bit(NVME_FC_Q_LIVE, &ctrl->queues[i].flags);
|
2016-12-02 16:28:42 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
nvme_fc_init_io_queues(struct nvme_fc_ctrl *ctrl)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
|
2017-04-24 15:58:29 +08:00
|
|
|
for (i = 1; i < ctrl->ctrl.queue_count; i++)
|
2017-11-08 06:13:11 +08:00
|
|
|
nvme_fc_init_queue(ctrl, i);
|
2016-12-02 16:28:42 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
nvme_fc_ctrl_free(struct kref *ref)
|
|
|
|
{
|
|
|
|
struct nvme_fc_ctrl *ctrl =
|
|
|
|
container_of(ref, struct nvme_fc_ctrl, ref);
|
|
|
|
unsigned long flags;
|
|
|
|
|
2017-04-23 23:30:08 +08:00
|
|
|
if (ctrl->ctrl.tagset) {
|
|
|
|
blk_cleanup_queue(ctrl->ctrl.connect_q);
|
|
|
|
blk_mq_free_tag_set(&ctrl->tag_set);
|
2016-12-02 16:28:42 +08:00
|
|
|
}
|
|
|
|
|
2017-04-23 23:30:08 +08:00
|
|
|
/* remove from rport list */
|
|
|
|
spin_lock_irqsave(&ctrl->rport->lock, flags);
|
|
|
|
list_del(&ctrl->ctrl_list);
|
|
|
|
spin_unlock_irqrestore(&ctrl->rport->lock, flags);
|
|
|
|
|
2017-07-02 20:39:34 +08:00
|
|
|
blk_mq_unquiesce_queue(ctrl->ctrl.admin_q);
|
2017-04-23 23:30:08 +08:00
|
|
|
blk_cleanup_queue(ctrl->ctrl.admin_q);
|
2019-08-03 10:33:59 +08:00
|
|
|
blk_cleanup_queue(ctrl->ctrl.fabrics_q);
|
2017-04-23 23:30:08 +08:00
|
|
|
blk_mq_free_tag_set(&ctrl->admin_tag_set);
|
|
|
|
|
|
|
|
kfree(ctrl->queues);
|
|
|
|
|
2016-12-02 16:28:42 +08:00
|
|
|
put_device(ctrl->dev);
|
|
|
|
nvme_fc_rport_put(ctrl->rport);
|
|
|
|
|
|
|
|
ida_simple_remove(&nvme_fc_ctrl_cnt, ctrl->cnum);
|
2017-04-25 01:24:16 +08:00
|
|
|
if (ctrl->ctrl.opts)
|
|
|
|
nvmf_free_options(ctrl->ctrl.opts);
|
2016-12-02 16:28:42 +08:00
|
|
|
kfree(ctrl);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
nvme_fc_ctrl_put(struct nvme_fc_ctrl *ctrl)
|
|
|
|
{
|
|
|
|
kref_put(&ctrl->ref, nvme_fc_ctrl_free);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
|
|
nvme_fc_ctrl_get(struct nvme_fc_ctrl *ctrl)
|
|
|
|
{
|
|
|
|
return kref_get_unless_zero(&ctrl->ref);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* All accesses from nvme core layer done - can now free the
|
|
|
|
* controller. Called after last nvme_put_ctrl() call
|
|
|
|
*/
|
|
|
|
static void
|
2017-04-23 23:30:08 +08:00
|
|
|
nvme_fc_nvme_ctrl_freed(struct nvme_ctrl *nctrl)
|
2016-12-02 16:28:42 +08:00
|
|
|
{
|
|
|
|
struct nvme_fc_ctrl *ctrl = to_fc_ctrl(nctrl);
|
|
|
|
|
|
|
|
WARN_ON(nctrl != &ctrl->ctrl);
|
|
|
|
|
2017-04-23 23:30:08 +08:00
|
|
|
nvme_fc_ctrl_put(ctrl);
|
|
|
|
}
|
2016-12-02 16:28:42 +08:00
|
|
|
|
2017-04-23 23:30:08 +08:00
|
|
|
static void
|
|
|
|
nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg)
|
|
|
|
{
|
2018-11-15 08:35:10 +08:00
|
|
|
int active;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* if an error (io timeout, etc) while (re)connecting,
|
|
|
|
* it's an error on creating the new association.
|
|
|
|
* Start the error recovery thread if it hasn't already
|
|
|
|
* been started. It is expected there could be multiple
|
|
|
|
* ios hitting this path before things are cleaned up.
|
|
|
|
*/
|
|
|
|
if (ctrl->ctrl.state == NVME_CTRL_CONNECTING) {
|
|
|
|
active = atomic_xchg(&ctrl->err_work_active, 1);
|
2019-05-03 17:43:52 +08:00
|
|
|
if (!active && !queue_work(nvme_fc_wq, &ctrl->err_work)) {
|
2018-11-15 08:35:10 +08:00
|
|
|
atomic_set(&ctrl->err_work_active, 0);
|
|
|
|
WARN_ON(1);
|
|
|
|
}
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Otherwise, only proceed if in LIVE state - e.g. on first error */
|
2017-06-22 08:43:21 +08:00
|
|
|
if (ctrl->ctrl.state != NVME_CTRL_LIVE)
|
|
|
|
return;
|
|
|
|
|
2017-04-23 23:30:08 +08:00
|
|
|
dev_warn(ctrl->ctrl.device,
|
|
|
|
"NVME-FC{%d}: transport association error detected: %s\n",
|
|
|
|
ctrl->cnum, errmsg);
|
2017-05-16 08:10:19 +08:00
|
|
|
dev_warn(ctrl->ctrl.device,
|
2017-04-23 23:30:08 +08:00
|
|
|
"NVME-FC{%d}: resetting controller\n", ctrl->cnum);
|
2016-12-02 16:28:42 +08:00
|
|
|
|
2017-06-15 21:41:08 +08:00
|
|
|
nvme_reset_ctrl(&ctrl->ctrl);
|
2016-12-02 16:28:42 +08:00
|
|
|
}
|
|
|
|
|
2017-04-21 16:44:06 +08:00
|
|
|
static enum blk_eh_timer_return
|
2016-12-02 16:28:42 +08:00
|
|
|
nvme_fc_timeout(struct request *rq, bool reserved)
|
|
|
|
{
|
|
|
|
struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq);
|
|
|
|
struct nvme_fc_ctrl *ctrl = op->ctrl;
|
|
|
|
|
|
|
|
/*
|
2017-04-23 23:30:08 +08:00
|
|
|
* we can't individually ABTS an io without affecting the queue,
|
2018-03-13 00:32:22 +08:00
|
|
|
* thus killing the queue, and thus the association.
|
2017-04-23 23:30:08 +08:00
|
|
|
* So resolve by performing a controller reset, which will stop
|
|
|
|
* the host/io stack, terminate the association on the link,
|
|
|
|
* and recreate an association on the link.
|
2016-12-02 16:28:42 +08:00
|
|
|
*/
|
2017-04-23 23:30:08 +08:00
|
|
|
nvme_fc_error_recovery(ctrl, "io timeout error");
|
2016-12-02 16:28:42 +08:00
|
|
|
|
nvme-fc: correct io timeout behavior
The transport io timeout behavior wasn't quite correct. It ignored
that the io error handler is supposed to be synchronous so it possibly
allowed the blk request to be restarted while the io associated was
still aborting. Timeouts on reserved commands, those used for
association create, were never timing out thus they hung out forever.
To correct:
If an io is times out while a remoteport is not connected, just
restart the io timer. The lack of connectivity will simultaneously
be resetting the controller, so the reset path will abort and terminate
the io.
If an io is times out while it was marked for transport abort, just
reset the io timer. The abort process is underway and will complete
the io.
Otherwise, if an io times out, abort the io. If the abort was
unsuccessful (unlikely) give up and return not handled.
If the abort was successful, as the abort process is underway it will
terminate the io, so rather than synchronously waiting, just restart
the io timer.
Signed-off-by: James Smart <james.smart@broadcom.com>
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Signed-off-by: Christoph Hellwig <hch@lst.de>
2017-10-20 07:11:39 +08:00
|
|
|
/*
|
|
|
|
* the io abort has been initiated. Have the reset timer
|
|
|
|
* restarted and the abort completion will complete the io
|
|
|
|
* shortly. Avoids a synchronous wait while the abort finishes.
|
|
|
|
*/
|
|
|
|
return BLK_EH_RESET_TIMER;
|
2016-12-02 16:28:42 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
|
|
nvme_fc_map_data(struct nvme_fc_ctrl *ctrl, struct request *rq,
|
|
|
|
struct nvme_fc_fcp_op *op)
|
|
|
|
{
|
|
|
|
struct nvmefc_fcp_req *freq = &op->fcp_req;
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
freq->sg_cnt = 0;
|
|
|
|
|
2019-03-14 01:55:00 +08:00
|
|
|
if (!blk_rq_nr_phys_segments(rq))
|
2016-12-02 16:28:42 +08:00
|
|
|
return 0;
|
|
|
|
|
|
|
|
freq->sg_table.sgl = freq->first_sgl;
|
2017-01-19 23:55:57 +08:00
|
|
|
ret = sg_alloc_table_chained(&freq->sg_table,
|
2019-04-28 15:39:30 +08:00
|
|
|
blk_rq_nr_phys_segments(rq), freq->sg_table.sgl,
|
|
|
|
SG_CHUNK_SIZE);
|
2016-12-02 16:28:42 +08:00
|
|
|
if (ret)
|
|
|
|
return -ENOMEM;
|
|
|
|
|
|
|
|
op->nents = blk_rq_map_sg(rq->q, rq, freq->sg_table.sgl);
|
2017-01-19 23:55:57 +08:00
|
|
|
WARN_ON(op->nents > blk_rq_nr_phys_segments(rq));
|
2016-12-02 16:28:42 +08:00
|
|
|
freq->sg_cnt = fc_dma_map_sg(ctrl->lport->dev, freq->sg_table.sgl,
|
2019-08-28 19:11:49 +08:00
|
|
|
op->nents, rq_dma_dir(rq));
|
2016-12-02 16:28:42 +08:00
|
|
|
if (unlikely(freq->sg_cnt <= 0)) {
|
2019-04-28 15:39:30 +08:00
|
|
|
sg_free_table_chained(&freq->sg_table, SG_CHUNK_SIZE);
|
2016-12-02 16:28:42 +08:00
|
|
|
freq->sg_cnt = 0;
|
|
|
|
return -EFAULT;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* TODO: blk_integrity_rq(rq) for DIF
|
|
|
|
*/
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
nvme_fc_unmap_data(struct nvme_fc_ctrl *ctrl, struct request *rq,
|
|
|
|
struct nvme_fc_fcp_op *op)
|
|
|
|
{
|
|
|
|
struct nvmefc_fcp_req *freq = &op->fcp_req;
|
|
|
|
|
|
|
|
if (!freq->sg_cnt)
|
|
|
|
return;
|
|
|
|
|
|
|
|
fc_dma_unmap_sg(ctrl->lport->dev, freq->sg_table.sgl, op->nents,
|
2019-08-28 19:11:49 +08:00
|
|
|
rq_dma_dir(rq));
|
2016-12-02 16:28:42 +08:00
|
|
|
|
|
|
|
nvme_cleanup_cmd(rq);
|
|
|
|
|
2019-04-28 15:39:30 +08:00
|
|
|
sg_free_table_chained(&freq->sg_table, SG_CHUNK_SIZE);
|
2016-12-02 16:28:42 +08:00
|
|
|
|
|
|
|
freq->sg_cnt = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* In FC, the queue is a logical thing. At transport connect, the target
|
|
|
|
* creates its "queue" and returns a handle that is to be given to the
|
|
|
|
* target whenever it posts something to the corresponding SQ. When an
|
|
|
|
* SQE is sent on a SQ, FC effectively considers the SQE, or rather the
|
|
|
|
* command contained within the SQE, an io, and assigns a FC exchange
|
|
|
|
* to it. The SQE and the associated SQ handle are sent in the initial
|
|
|
|
* CMD IU sents on the exchange. All transfers relative to the io occur
|
|
|
|
* as part of the exchange. The CQE is the last thing for the io,
|
|
|
|
* which is transferred (explicitly or implicitly) with the RSP IU
|
|
|
|
* sent on the exchange. After the CQE is received, the FC exchange is
|
|
|
|
* terminaed and the Exchange may be used on a different io.
|
|
|
|
*
|
|
|
|
* The transport to LLDD api has the transport making a request for a
|
|
|
|
* new fcp io request to the LLDD. The LLDD then allocates a FC exchange
|
|
|
|
* resource and transfers the command. The LLDD will then process all
|
|
|
|
* steps to complete the io. Upon completion, the transport done routine
|
|
|
|
* is called.
|
|
|
|
*
|
|
|
|
* So - while the operation is outstanding to the LLDD, there is a link
|
|
|
|
* level FC exchange resource that is also outstanding. This must be
|
|
|
|
* considered in all cleanup operations.
|
|
|
|
*/
|
2017-06-03 15:38:05 +08:00
|
|
|
static blk_status_t
|
2016-12-02 16:28:42 +08:00
|
|
|
nvme_fc_start_fcp_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue,
|
|
|
|
struct nvme_fc_fcp_op *op, u32 data_len,
|
|
|
|
enum nvmefc_fcp_datadir io_dir)
|
|
|
|
{
|
|
|
|
struct nvme_fc_cmd_iu *cmdiu = &op->cmd_iu;
|
|
|
|
struct nvme_command *sqe = &cmdiu->sqe;
|
2018-03-01 06:49:10 +08:00
|
|
|
int ret, opstate;
|
2016-12-02 16:28:42 +08:00
|
|
|
|
2017-04-23 23:30:08 +08:00
|
|
|
/*
|
|
|
|
* before attempting to send the io, check to see if we believe
|
|
|
|
* the target device is present
|
|
|
|
*/
|
|
|
|
if (ctrl->rport->remoteport.port_state != FC_OBJSTATE_ONLINE)
|
2018-01-31 11:04:57 +08:00
|
|
|
return BLK_STS_RESOURCE;
|
2017-04-23 23:30:08 +08:00
|
|
|
|
2016-12-02 16:28:42 +08:00
|
|
|
if (!nvme_fc_ctrl_get(ctrl))
|
2017-06-03 15:38:05 +08:00
|
|
|
return BLK_STS_IOERR;
|
2016-12-02 16:28:42 +08:00
|
|
|
|
|
|
|
/* format the FC-NVME CMD IU and fcp_req */
|
|
|
|
cmdiu->connection_id = cpu_to_be64(queue->connection_id);
|
|
|
|
cmdiu->data_len = cpu_to_be32(data_len);
|
|
|
|
switch (io_dir) {
|
|
|
|
case NVMEFC_FCP_WRITE:
|
|
|
|
cmdiu->flags = FCNVME_CMD_FLAGS_WRITE;
|
|
|
|
break;
|
|
|
|
case NVMEFC_FCP_READ:
|
|
|
|
cmdiu->flags = FCNVME_CMD_FLAGS_READ;
|
|
|
|
break;
|
|
|
|
case NVMEFC_FCP_NODATA:
|
|
|
|
cmdiu->flags = 0;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
op->fcp_req.payload_length = data_len;
|
|
|
|
op->fcp_req.io_dir = io_dir;
|
|
|
|
op->fcp_req.transferred_length = 0;
|
|
|
|
op->fcp_req.rcv_rsplen = 0;
|
2017-03-24 11:41:27 +08:00
|
|
|
op->fcp_req.status = NVME_SC_SUCCESS;
|
2016-12-02 16:28:42 +08:00
|
|
|
op->fcp_req.sqid = cpu_to_le16(queue->qnum);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* validate per fabric rules, set fields mandated by fabric spec
|
|
|
|
* as well as those by FC-NVME spec.
|
|
|
|
*/
|
|
|
|
WARN_ON_ONCE(sqe->common.metadata);
|
|
|
|
sqe->common.flags |= NVME_CMD_SGL_METABUF;
|
|
|
|
|
|
|
|
/*
|
2017-09-08 04:20:24 +08:00
|
|
|
* format SQE DPTR field per FC-NVME rules:
|
|
|
|
* type=0x5 Transport SGL Data Block Descriptor
|
|
|
|
* subtype=0xA Transport-specific value
|
|
|
|
* address=0
|
|
|
|
* length=length of the data series
|
2016-12-02 16:28:42 +08:00
|
|
|
*/
|
2017-09-08 04:20:24 +08:00
|
|
|
sqe->rw.dptr.sgl.type = (NVME_TRANSPORT_SGL_DATA_DESC << 4) |
|
|
|
|
NVME_SGL_FMT_TRANSPORT_A;
|
2016-12-02 16:28:42 +08:00
|
|
|
sqe->rw.dptr.sgl.length = cpu_to_le32(data_len);
|
|
|
|
sqe->rw.dptr.sgl.addr = 0;
|
|
|
|
|
2017-04-23 23:30:07 +08:00
|
|
|
if (!(op->flags & FCOP_FLAGS_AEN)) {
|
2016-12-02 16:28:42 +08:00
|
|
|
ret = nvme_fc_map_data(ctrl, op->rq, op);
|
|
|
|
if (ret < 0) {
|
|
|
|
nvme_cleanup_cmd(op->rq);
|
|
|
|
nvme_fc_ctrl_put(ctrl);
|
2017-06-03 15:38:05 +08:00
|
|
|
if (ret == -ENOMEM || ret == -EAGAIN)
|
|
|
|
return BLK_STS_RESOURCE;
|
|
|
|
return BLK_STS_IOERR;
|
2016-12-02 16:28:42 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
fc_dma_sync_single_for_device(ctrl->lport->dev, op->fcp_req.cmddma,
|
|
|
|
sizeof(op->cmd_iu), DMA_TO_DEVICE);
|
|
|
|
|
|
|
|
atomic_set(&op->state, FCPOP_STATE_ACTIVE);
|
|
|
|
|
2017-04-23 23:30:07 +08:00
|
|
|
if (!(op->flags & FCOP_FLAGS_AEN))
|
2016-12-02 16:28:42 +08:00
|
|
|
blk_mq_start_request(op->rq);
|
|
|
|
|
2019-04-09 02:15:19 +08:00
|
|
|
cmdiu->csn = cpu_to_be32(atomic_inc_return(&queue->csn));
|
2016-12-02 16:28:42 +08:00
|
|
|
ret = ctrl->lport->ops->fcp_io(&ctrl->lport->localport,
|
|
|
|
&ctrl->rport->remoteport,
|
|
|
|
queue->lldd_handle, &op->fcp_req);
|
|
|
|
|
|
|
|
if (ret) {
|
2019-04-09 02:15:19 +08:00
|
|
|
/*
|
|
|
|
* If the lld fails to send the command is there an issue with
|
|
|
|
* the csn value? If the command that fails is the Connect,
|
|
|
|
* no - as the connection won't be live. If it is a command
|
|
|
|
* post-connect, it's possible a gap in csn may be created.
|
|
|
|
* Does this matter? As Linux initiators don't send fused
|
|
|
|
* commands, no. The gap would exist, but as there's nothing
|
|
|
|
* that depends on csn order to be delivered on the target
|
|
|
|
* side, it shouldn't hurt. It would be difficult for a
|
|
|
|
* target to even detect the csn gap as it has no idea when the
|
|
|
|
* cmd with the csn was supposed to arrive.
|
|
|
|
*/
|
2018-03-01 06:49:10 +08:00
|
|
|
opstate = atomic_xchg(&op->state, FCPOP_STATE_COMPLETE);
|
|
|
|
__nvme_fc_fcpop_chk_teardowns(ctrl, op, opstate);
|
|
|
|
|
2017-07-19 05:29:34 +08:00
|
|
|
if (!(op->flags & FCOP_FLAGS_AEN))
|
2016-12-02 16:28:42 +08:00
|
|
|
nvme_fc_unmap_data(ctrl, op->rq, op);
|
|
|
|
|
|
|
|
nvme_fc_ctrl_put(ctrl);
|
|
|
|
|
2017-07-19 05:29:34 +08:00
|
|
|
if (ctrl->rport->remoteport.port_state == FC_OBJSTATE_ONLINE &&
|
|
|
|
ret != -EBUSY)
|
2017-06-03 15:38:05 +08:00
|
|
|
return BLK_STS_IOERR;
|
2016-12-02 16:28:42 +08:00
|
|
|
|
2018-01-31 11:04:57 +08:00
|
|
|
return BLK_STS_RESOURCE;
|
2016-12-02 16:28:42 +08:00
|
|
|
}
|
|
|
|
|
2017-06-03 15:38:05 +08:00
|
|
|
return BLK_STS_OK;
|
2016-12-02 16:28:42 +08:00
|
|
|
}
|
|
|
|
|
2017-06-03 15:38:05 +08:00
|
|
|
static blk_status_t
|
2016-12-02 16:28:42 +08:00
|
|
|
nvme_fc_queue_rq(struct blk_mq_hw_ctx *hctx,
|
|
|
|
const struct blk_mq_queue_data *bd)
|
|
|
|
{
|
|
|
|
struct nvme_ns *ns = hctx->queue->queuedata;
|
|
|
|
struct nvme_fc_queue *queue = hctx->driver_data;
|
|
|
|
struct nvme_fc_ctrl *ctrl = queue->ctrl;
|
|
|
|
struct request *rq = bd->rq;
|
|
|
|
struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq);
|
|
|
|
struct nvme_fc_cmd_iu *cmdiu = &op->cmd_iu;
|
|
|
|
struct nvme_command *sqe = &cmdiu->sqe;
|
|
|
|
enum nvmefc_fcp_datadir io_dir;
|
2018-06-11 23:34:06 +08:00
|
|
|
bool queue_ready = test_bit(NVME_FC_Q_LIVE, &queue->flags);
|
2016-12-02 16:28:42 +08:00
|
|
|
u32 data_len;
|
2017-06-03 15:38:05 +08:00
|
|
|
blk_status_t ret;
|
2016-12-02 16:28:42 +08:00
|
|
|
|
2018-06-11 23:34:06 +08:00
|
|
|
if (ctrl->rport->remoteport.port_state != FC_OBJSTATE_ONLINE ||
|
|
|
|
!nvmf_check_ready(&queue->ctrl->ctrl, rq, queue_ready))
|
2018-07-21 06:49:48 +08:00
|
|
|
return nvmf_fail_nonready_command(&queue->ctrl->ctrl, rq);
|
2017-10-24 20:25:21 +08:00
|
|
|
|
2016-12-02 16:28:42 +08:00
|
|
|
ret = nvme_setup_cmd(ns, rq, sqe);
|
|
|
|
if (ret)
|
|
|
|
return ret;
|
|
|
|
|
2019-03-14 01:55:00 +08:00
|
|
|
/*
|
|
|
|
* nvme core doesn't quite treat the rq opaquely. Commands such
|
|
|
|
* as WRITE ZEROES will return a non-zero rq payload_bytes yet
|
|
|
|
* there is no actual payload to be transferred.
|
|
|
|
* To get it right, key data transmission on there being 1 or
|
|
|
|
* more physical segments in the sg list. If there is no
|
|
|
|
* physical segments, there is no payload.
|
|
|
|
*/
|
|
|
|
if (blk_rq_nr_phys_segments(rq)) {
|
|
|
|
data_len = blk_rq_payload_bytes(rq);
|
2016-12-02 16:28:42 +08:00
|
|
|
io_dir = ((rq_data_dir(rq) == WRITE) ?
|
|
|
|
NVMEFC_FCP_WRITE : NVMEFC_FCP_READ);
|
2019-03-14 01:55:00 +08:00
|
|
|
} else {
|
|
|
|
data_len = 0;
|
2016-12-02 16:28:42 +08:00
|
|
|
io_dir = NVMEFC_FCP_NODATA;
|
2019-03-14 01:55:00 +08:00
|
|
|
}
|
|
|
|
|
2016-12-02 16:28:42 +08:00
|
|
|
|
|
|
|
return nvme_fc_start_fcp_op(ctrl, queue, op, data_len, io_dir);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
2017-11-08 06:13:12 +08:00
|
|
|
nvme_fc_submit_async_event(struct nvme_ctrl *arg)
|
2016-12-02 16:28:42 +08:00
|
|
|
{
|
|
|
|
struct nvme_fc_ctrl *ctrl = to_fc_ctrl(arg);
|
|
|
|
struct nvme_fc_fcp_op *aen_op;
|
2017-04-23 23:30:08 +08:00
|
|
|
unsigned long flags;
|
|
|
|
bool terminating = false;
|
2017-06-03 15:38:05 +08:00
|
|
|
blk_status_t ret;
|
2016-12-02 16:28:42 +08:00
|
|
|
|
2017-04-23 23:30:08 +08:00
|
|
|
spin_lock_irqsave(&ctrl->lock, flags);
|
|
|
|
if (ctrl->flags & FCCTRL_TERMIO)
|
|
|
|
terminating = true;
|
|
|
|
spin_unlock_irqrestore(&ctrl->lock, flags);
|
|
|
|
|
|
|
|
if (terminating)
|
|
|
|
return;
|
|
|
|
|
2017-11-08 06:13:12 +08:00
|
|
|
aen_op = &ctrl->aen_ops[0];
|
2016-12-02 16:28:42 +08:00
|
|
|
|
|
|
|
ret = nvme_fc_start_fcp_op(ctrl, aen_op->queue, aen_op, 0,
|
|
|
|
NVMEFC_FCP_NODATA);
|
|
|
|
if (ret)
|
|
|
|
dev_err(ctrl->ctrl.device,
|
2017-11-08 06:13:12 +08:00
|
|
|
"failed async event work\n");
|
2016-12-02 16:28:42 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
2018-02-06 22:48:30 +08:00
|
|
|
nvme_fc_complete_rq(struct request *rq)
|
2016-12-02 16:28:42 +08:00
|
|
|
{
|
|
|
|
struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq);
|
|
|
|
struct nvme_fc_ctrl *ctrl = op->ctrl;
|
|
|
|
|
2017-04-23 23:30:07 +08:00
|
|
|
atomic_set(&op->state, FCPOP_STATE_IDLE);
|
2016-12-02 16:28:42 +08:00
|
|
|
|
|
|
|
nvme_fc_unmap_data(ctrl, rq, op);
|
2017-03-30 19:41:32 +08:00
|
|
|
nvme_complete_rq(rq);
|
2016-12-02 16:28:42 +08:00
|
|
|
nvme_fc_ctrl_put(ctrl);
|
2017-04-23 23:30:07 +08:00
|
|
|
}
|
|
|
|
|
2016-12-02 16:28:42 +08:00
|
|
|
/*
|
|
|
|
* This routine is used by the transport when it needs to find active
|
|
|
|
* io on a queue that is to be terminated. The transport uses
|
|
|
|
* blk_mq_tagset_busy_itr() to find the busy requests, which then invoke
|
|
|
|
* this routine to kill them on a 1 by 1 basis.
|
|
|
|
*
|
|
|
|
* As FC allocates FC exchange for each io, the transport must contact
|
|
|
|
* the LLDD to terminate the exchange, thus releasing the FC exchange.
|
|
|
|
* After terminating the exchange the LLDD will call the transport's
|
|
|
|
* normal io done path for the request, but it will have an aborted
|
|
|
|
* status. The done path will return the io request back to the block
|
|
|
|
* layer with an error status.
|
|
|
|
*/
|
2018-11-09 01:24:07 +08:00
|
|
|
static bool
|
2016-12-02 16:28:42 +08:00
|
|
|
nvme_fc_terminate_exchange(struct request *req, void *data, bool reserved)
|
|
|
|
{
|
|
|
|
struct nvme_ctrl *nctrl = data;
|
|
|
|
struct nvme_fc_ctrl *ctrl = to_fc_ctrl(nctrl);
|
|
|
|
struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(req);
|
|
|
|
|
2018-02-06 22:48:29 +08:00
|
|
|
__nvme_fc_abort_op(ctrl, op);
|
2018-11-09 01:24:07 +08:00
|
|
|
return true;
|
2016-12-02 16:28:42 +08:00
|
|
|
}
|
|
|
|
|
2017-04-23 23:30:07 +08:00
|
|
|
|
2017-04-23 23:30:08 +08:00
|
|
|
static const struct blk_mq_ops nvme_fc_mq_ops = {
|
|
|
|
.queue_rq = nvme_fc_queue_rq,
|
|
|
|
.complete = nvme_fc_complete_rq,
|
|
|
|
.init_request = nvme_fc_init_request,
|
|
|
|
.exit_request = nvme_fc_exit_request,
|
|
|
|
.init_hctx = nvme_fc_init_hctx,
|
|
|
|
.timeout = nvme_fc_timeout,
|
|
|
|
};
|
2016-12-02 16:28:42 +08:00
|
|
|
|
2017-04-23 23:30:08 +08:00
|
|
|
static int
|
|
|
|
nvme_fc_create_io_queues(struct nvme_fc_ctrl *ctrl)
|
2016-12-02 16:28:42 +08:00
|
|
|
{
|
2017-04-23 23:30:08 +08:00
|
|
|
struct nvmf_ctrl_options *opts = ctrl->ctrl.opts;
|
2017-06-29 16:16:49 +08:00
|
|
|
unsigned int nr_io_queues;
|
2017-04-23 23:30:08 +08:00
|
|
|
int ret;
|
2016-12-02 16:28:42 +08:00
|
|
|
|
2017-06-29 16:16:49 +08:00
|
|
|
nr_io_queues = min(min(opts->nr_io_queues, num_online_cpus()),
|
|
|
|
ctrl->lport->ops->max_hw_queues);
|
|
|
|
ret = nvme_set_queue_count(&ctrl->ctrl, &nr_io_queues);
|
2017-04-23 23:30:08 +08:00
|
|
|
if (ret) {
|
|
|
|
dev_info(ctrl->ctrl.device,
|
|
|
|
"set_queue_count failed: %d\n", ret);
|
|
|
|
return ret;
|
|
|
|
}
|
2016-12-02 16:28:42 +08:00
|
|
|
|
2017-06-29 16:16:49 +08:00
|
|
|
ctrl->ctrl.queue_count = nr_io_queues + 1;
|
|
|
|
if (!nr_io_queues)
|
2017-04-23 23:30:08 +08:00
|
|
|
return 0;
|
2016-12-02 16:28:42 +08:00
|
|
|
|
2017-04-23 23:30:08 +08:00
|
|
|
nvme_fc_init_io_queues(ctrl);
|
2016-12-02 16:28:42 +08:00
|
|
|
|
2017-04-23 23:30:08 +08:00
|
|
|
memset(&ctrl->tag_set, 0, sizeof(ctrl->tag_set));
|
|
|
|
ctrl->tag_set.ops = &nvme_fc_mq_ops;
|
|
|
|
ctrl->tag_set.queue_depth = ctrl->ctrl.opts->queue_size;
|
|
|
|
ctrl->tag_set.reserved_tags = 1; /* fabric connect */
|
2018-11-16 16:22:29 +08:00
|
|
|
ctrl->tag_set.numa_node = ctrl->ctrl.numa_node;
|
2017-04-23 23:30:08 +08:00
|
|
|
ctrl->tag_set.flags = BLK_MQ_F_SHOULD_MERGE;
|
2018-10-09 05:28:45 +08:00
|
|
|
ctrl->tag_set.cmd_size =
|
|
|
|
struct_size((struct nvme_fcp_op_w_sgl *)NULL, priv,
|
|
|
|
ctrl->lport->ops->fcprqst_priv_sz);
|
2017-04-23 23:30:08 +08:00
|
|
|
ctrl->tag_set.driver_data = ctrl;
|
2017-04-24 15:58:29 +08:00
|
|
|
ctrl->tag_set.nr_hw_queues = ctrl->ctrl.queue_count - 1;
|
2017-04-23 23:30:08 +08:00
|
|
|
ctrl->tag_set.timeout = NVME_IO_TIMEOUT;
|
2016-12-02 16:28:42 +08:00
|
|
|
|
2017-04-23 23:30:08 +08:00
|
|
|
ret = blk_mq_alloc_tag_set(&ctrl->tag_set);
|
|
|
|
if (ret)
|
|
|
|
return ret;
|
2016-12-02 16:28:42 +08:00
|
|
|
|
2017-04-23 23:30:08 +08:00
|
|
|
ctrl->ctrl.tagset = &ctrl->tag_set;
|
2016-12-02 16:28:42 +08:00
|
|
|
|
2017-04-23 23:30:08 +08:00
|
|
|
ctrl->ctrl.connect_q = blk_mq_init_queue(&ctrl->tag_set);
|
|
|
|
if (IS_ERR(ctrl->ctrl.connect_q)) {
|
|
|
|
ret = PTR_ERR(ctrl->ctrl.connect_q);
|
|
|
|
goto out_free_tag_set;
|
|
|
|
}
|
2016-12-02 16:28:42 +08:00
|
|
|
|
2018-03-08 07:59:36 +08:00
|
|
|
ret = nvme_fc_create_hw_io_queues(ctrl, ctrl->ctrl.sqsize + 1);
|
2016-12-02 16:28:42 +08:00
|
|
|
if (ret)
|
2017-04-23 23:30:08 +08:00
|
|
|
goto out_cleanup_blk_queue;
|
2016-12-02 16:28:42 +08:00
|
|
|
|
2018-03-08 07:59:36 +08:00
|
|
|
ret = nvme_fc_connect_io_queues(ctrl, ctrl->ctrl.sqsize + 1);
|
2017-04-23 23:30:08 +08:00
|
|
|
if (ret)
|
|
|
|
goto out_delete_hw_queues;
|
2016-12-02 16:28:42 +08:00
|
|
|
|
nvme-fc: change controllers first connect to use reconnect path
Current code follows the framework that has been in the transports
from the beginning where initial link-side controller connect occurs
as part of "creating the controller". Thus that first connect fully
talks to the controller and obtains values that can then be used in
for blk-mq setup, etc. It also means that everything about the
controller is fully know before the "create controller" call returns.
This has several weaknesses:
- The initial create_ctrl call made by the cli will block for a long
time as wire transactions are performed synchronously. This delay
becomes longer if errors occur or connectivity is lost and retries
need to be performed.
- Code wise, it means there is a separate connect path for initial
controller connect vs the (same) steps used in the reconnect path.
- And as there's separate paths, it means there's separate error
handling and retry logic. It also plays havoc with the NEW state
(should transition out of it after successful initial connect) vs
the RESETTING and CONNECTING (reconnect) states that want to be
transitioned to on error.
- As there's separate paths, to recover from errors and disruptions,
it requires separate recovery/retry paths as well and can severely
convolute the controller state.
This patch reworks the fc transport to use the same connect paths
for the initial connection as it uses for reconnect. This makes a
single path for error recovery and handling.
This patch:
- Removes the driving of the initial connect and replaces it with
a state transition to CONNECTING and initiating the reconnect
thread. A dummy state transition of RESETTING had to be traversed
as a direct transtion of NEW->CONNECTING is not allowed. Given
that the controller is "new", the RESETTING transition is a simple
no-op. Once in the reconnecting thread, the normal behaviors of
ctrl_loss_tmo (max_retries * connect_delay) and dev_loss_tmo will
apply before the controller is torn down.
- Only if the state transitions couldn't be traversed and the
reconnect thread not scheduled, will the controller be torn down
while in create_ctrl.
- The prior code used the controller state of NEW to indicate
whether request queues had been initialized or not. For the admin
queue, the request queue is always created, so there's no need to
check a state. For IO queues, change to tracking whether a successful
io request queue create has occurred (e.g. 1st successful connect).
- The initial controller id is initialized to the dynamic controller
id used in the initial connect message. It will be overwritten by
the real controller id once the controller is connected on the wire.
Signed-off-by: James Smart <james.smart@broadcom.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
2018-06-14 05:07:37 +08:00
|
|
|
ctrl->ioq_live = true;
|
|
|
|
|
2016-12-02 16:28:42 +08:00
|
|
|
return 0;
|
|
|
|
|
2017-04-23 23:30:08 +08:00
|
|
|
out_delete_hw_queues:
|
|
|
|
nvme_fc_delete_hw_io_queues(ctrl);
|
|
|
|
out_cleanup_blk_queue:
|
|
|
|
blk_cleanup_queue(ctrl->ctrl.connect_q);
|
|
|
|
out_free_tag_set:
|
|
|
|
blk_mq_free_tag_set(&ctrl->tag_set);
|
|
|
|
nvme_fc_free_io_queues(ctrl);
|
2016-12-02 16:28:42 +08:00
|
|
|
|
2017-04-23 23:30:08 +08:00
|
|
|
/* force put free routine to ignore io queues */
|
|
|
|
ctrl->ctrl.tagset = NULL;
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
2016-12-02 16:28:42 +08:00
|
|
|
|
|
|
|
static int
|
2018-06-14 05:07:38 +08:00
|
|
|
nvme_fc_recreate_io_queues(struct nvme_fc_ctrl *ctrl)
|
2016-12-02 16:28:42 +08:00
|
|
|
{
|
|
|
|
struct nvmf_ctrl_options *opts = ctrl->ctrl.opts;
|
nvme-fc: reject reconnect if io queue count is reduced to zero
If:
- A successful connect has occurred with an io queue count greater than
zero and namespaces detected and running.
- An error or something occurs which causes a termination of the prior
association and then starts a reconnect,
- The reconnect then creates a new controller, but for whatever reason,
nvme_set_queue_count() results in io queue count set to zero. This
will skip io queue and tag set changes.
- But... the controller will transition to live, calling
nvme_start_ctrl, which calls nvme_start_queues(), which then releases
I/Os into the transport which then sends them to the driver.
As there are no queues, things eventually hit the driver looking for a
handle, which was cleared when the original controller was reset, and it
can't proceed. Worst case, things progress, but everything fails.
In the failing scenario, the nvme_set_features(NVME_FEAT_NUM_QUEUES)
command actually failed with a NVME_SC_INTERNAL error. For some reason,
although nvme_set_queue_count() saw the error and set io queue count to
zero, it doesn't return a failure status to the transport, which allows
the transport to continue using the controller.
Fix the problem by simply rejecting the new association if at least 1
I/O queue can't be created. The association reject will fail the
reconnect attempt and fall into the reconnect retry policy.
Signed-off-by: James Smart <jsmart2021@gmail.com>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
2019-03-14 01:55:02 +08:00
|
|
|
u32 prior_ioq_cnt = ctrl->ctrl.queue_count - 1;
|
2017-06-29 16:16:49 +08:00
|
|
|
unsigned int nr_io_queues;
|
2016-12-02 16:28:42 +08:00
|
|
|
int ret;
|
|
|
|
|
2017-06-29 16:16:49 +08:00
|
|
|
nr_io_queues = min(min(opts->nr_io_queues, num_online_cpus()),
|
|
|
|
ctrl->lport->ops->max_hw_queues);
|
|
|
|
ret = nvme_set_queue_count(&ctrl->ctrl, &nr_io_queues);
|
2016-12-02 16:28:42 +08:00
|
|
|
if (ret) {
|
|
|
|
dev_info(ctrl->ctrl.device,
|
|
|
|
"set_queue_count failed: %d\n", ret);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
nvme-fc: reject reconnect if io queue count is reduced to zero
If:
- A successful connect has occurred with an io queue count greater than
zero and namespaces detected and running.
- An error or something occurs which causes a termination of the prior
association and then starts a reconnect,
- The reconnect then creates a new controller, but for whatever reason,
nvme_set_queue_count() results in io queue count set to zero. This
will skip io queue and tag set changes.
- But... the controller will transition to live, calling
nvme_start_ctrl, which calls nvme_start_queues(), which then releases
I/Os into the transport which then sends them to the driver.
As there are no queues, things eventually hit the driver looking for a
handle, which was cleared when the original controller was reset, and it
can't proceed. Worst case, things progress, but everything fails.
In the failing scenario, the nvme_set_features(NVME_FEAT_NUM_QUEUES)
command actually failed with a NVME_SC_INTERNAL error. For some reason,
although nvme_set_queue_count() saw the error and set io queue count to
zero, it doesn't return a failure status to the transport, which allows
the transport to continue using the controller.
Fix the problem by simply rejecting the new association if at least 1
I/O queue can't be created. The association reject will fail the
reconnect attempt and fall into the reconnect retry policy.
Signed-off-by: James Smart <jsmart2021@gmail.com>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
2019-03-14 01:55:02 +08:00
|
|
|
if (!nr_io_queues && prior_ioq_cnt) {
|
|
|
|
dev_info(ctrl->ctrl.device,
|
|
|
|
"Fail Reconnect: At least 1 io queue "
|
|
|
|
"required (was %d)\n", prior_ioq_cnt);
|
|
|
|
return -ENOSPC;
|
|
|
|
}
|
|
|
|
|
2017-06-29 16:16:49 +08:00
|
|
|
ctrl->ctrl.queue_count = nr_io_queues + 1;
|
2017-04-23 23:30:08 +08:00
|
|
|
/* check for io queues existing */
|
2017-04-24 15:58:29 +08:00
|
|
|
if (ctrl->ctrl.queue_count == 1)
|
2016-12-02 16:28:42 +08:00
|
|
|
return 0;
|
|
|
|
|
2018-03-08 07:59:36 +08:00
|
|
|
ret = nvme_fc_create_hw_io_queues(ctrl, ctrl->ctrl.sqsize + 1);
|
2016-12-02 16:28:42 +08:00
|
|
|
if (ret)
|
2017-04-23 23:30:08 +08:00
|
|
|
goto out_free_io_queues;
|
2016-12-02 16:28:42 +08:00
|
|
|
|
2018-03-08 07:59:36 +08:00
|
|
|
ret = nvme_fc_connect_io_queues(ctrl, ctrl->ctrl.sqsize + 1);
|
2016-12-02 16:28:42 +08:00
|
|
|
if (ret)
|
|
|
|
goto out_delete_hw_queues;
|
|
|
|
|
nvme-fc: reject reconnect if io queue count is reduced to zero
If:
- A successful connect has occurred with an io queue count greater than
zero and namespaces detected and running.
- An error or something occurs which causes a termination of the prior
association and then starts a reconnect,
- The reconnect then creates a new controller, but for whatever reason,
nvme_set_queue_count() results in io queue count set to zero. This
will skip io queue and tag set changes.
- But... the controller will transition to live, calling
nvme_start_ctrl, which calls nvme_start_queues(), which then releases
I/Os into the transport which then sends them to the driver.
As there are no queues, things eventually hit the driver looking for a
handle, which was cleared when the original controller was reset, and it
can't proceed. Worst case, things progress, but everything fails.
In the failing scenario, the nvme_set_features(NVME_FEAT_NUM_QUEUES)
command actually failed with a NVME_SC_INTERNAL error. For some reason,
although nvme_set_queue_count() saw the error and set io queue count to
zero, it doesn't return a failure status to the transport, which allows
the transport to continue using the controller.
Fix the problem by simply rejecting the new association if at least 1
I/O queue can't be created. The association reject will fail the
reconnect attempt and fall into the reconnect retry policy.
Signed-off-by: James Smart <jsmart2021@gmail.com>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
2019-03-14 01:55:02 +08:00
|
|
|
if (prior_ioq_cnt != nr_io_queues)
|
|
|
|
dev_info(ctrl->ctrl.device,
|
|
|
|
"reconnect: revising io queue count from %d to %d\n",
|
|
|
|
prior_ioq_cnt, nr_io_queues);
|
2017-06-29 16:20:10 +08:00
|
|
|
blk_mq_update_nr_hw_queues(&ctrl->tag_set, nr_io_queues);
|
|
|
|
|
2016-12-02 16:28:42 +08:00
|
|
|
return 0;
|
|
|
|
|
|
|
|
out_delete_hw_queues:
|
|
|
|
nvme_fc_delete_hw_io_queues(ctrl);
|
2017-04-23 23:30:08 +08:00
|
|
|
out_free_io_queues:
|
2016-12-02 16:28:42 +08:00
|
|
|
nvme_fc_free_io_queues(ctrl);
|
2017-04-23 23:30:08 +08:00
|
|
|
return ret;
|
|
|
|
}
|
2016-12-02 16:28:42 +08:00
|
|
|
|
nvme-fc: decouple ns references from lldd references
In the lldd api, a lldd may unregister a remoteport (loss of connectivity
or driver unload) or localport (driver unload). The lldd must wait for the
remoteport_delete or localport_delete before completing its actions post
the unregister. The xxx_deletes currently occur only when the xxxport
structure is fully freed after all references are removed. Thus the lldd
may be held hostage until an app or in-kernel entity that has a namespace
open finally closes so the namespace can be removed, the controller
removed, thus the transport objects, thus the lldd.
This patch decouples the transport and os-facing objects from the lldd
and the remoteport and localport. There is a point in all deletions
where the transport will no longer interact with the lldd on behalf of
a controller. That point centers around the association established
with the target/subsystem. It will access the lldd whenever it attempts
to create an association and while the association is active. New
associations may only be created if the remoteport is live (thus the
localport is live). It will not access the lldd after deleting the
association.
Therefore, the patch tracks the count of active controllers - those with
associations being created or that are active - on a remoteport. It also
tracks the number of remoteports that have active controllers, on a
a localport. When a remoteport is unregistered, as soon as there are no
active controllers, the lldd's remoteport_delete may be called and the
lldd may continue. Similarly, when a localport is unregistered, as soon
as there are no remoteports with active controllers, the localport_delete
callback may be made. This significantly speeds up unregistration with
the lldd.
The transport objects continue in suspended status with reconnect timers
running, and upon expiration, normal ref-counting will occur and the
objects will be freed. The transport object may still be held hostage
by the application/kernel module, but that is acceptable.
With this change, the lldd may be fully unloaded and reloaded, and
if registrations occur prior to the timeouts, the nvme controller and
namespaces will resume normally as if a link bounce.
Signed-off-by: James Smart <james.smart@broadcom.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
2017-11-03 23:13:17 +08:00
|
|
|
static void
|
|
|
|
nvme_fc_rport_active_on_lport(struct nvme_fc_rport *rport)
|
|
|
|
{
|
|
|
|
struct nvme_fc_lport *lport = rport->lport;
|
|
|
|
|
|
|
|
atomic_inc(&lport->act_rport_cnt);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
nvme_fc_rport_inactive_on_lport(struct nvme_fc_rport *rport)
|
|
|
|
{
|
|
|
|
struct nvme_fc_lport *lport = rport->lport;
|
|
|
|
u32 cnt;
|
|
|
|
|
|
|
|
cnt = atomic_dec_return(&lport->act_rport_cnt);
|
|
|
|
if (cnt == 0 && lport->localport.port_state == FC_OBJSTATE_DELETED)
|
|
|
|
lport->ops->localport_delete(&lport->localport);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
|
|
nvme_fc_ctlr_active_on_rport(struct nvme_fc_ctrl *ctrl)
|
|
|
|
{
|
|
|
|
struct nvme_fc_rport *rport = ctrl->rport;
|
|
|
|
u32 cnt;
|
|
|
|
|
|
|
|
if (ctrl->assoc_active)
|
|
|
|
return 1;
|
|
|
|
|
|
|
|
ctrl->assoc_active = true;
|
|
|
|
cnt = atomic_inc_return(&rport->act_ctrl_cnt);
|
|
|
|
if (cnt == 1)
|
|
|
|
nvme_fc_rport_active_on_lport(rport);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
|
|
nvme_fc_ctlr_inactive_on_rport(struct nvme_fc_ctrl *ctrl)
|
|
|
|
{
|
|
|
|
struct nvme_fc_rport *rport = ctrl->rport;
|
|
|
|
struct nvme_fc_lport *lport = rport->lport;
|
|
|
|
u32 cnt;
|
|
|
|
|
|
|
|
/* ctrl->assoc_active=false will be set independently */
|
|
|
|
|
|
|
|
cnt = atomic_dec_return(&rport->act_ctrl_cnt);
|
|
|
|
if (cnt == 0) {
|
|
|
|
if (rport->remoteport.port_state == FC_OBJSTATE_DELETED)
|
|
|
|
lport->ops->remoteport_delete(&rport->remoteport);
|
|
|
|
nvme_fc_rport_inactive_on_lport(rport);
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2017-04-23 23:30:08 +08:00
|
|
|
/*
|
|
|
|
* This routine restarts the controller on the host side, and
|
|
|
|
* on the link side, recreates the controller association.
|
|
|
|
*/
|
|
|
|
static int
|
|
|
|
nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
|
|
|
|
{
|
|
|
|
struct nvmf_ctrl_options *opts = ctrl->ctrl.opts;
|
|
|
|
int ret;
|
|
|
|
bool changed;
|
|
|
|
|
2017-05-04 18:33:15 +08:00
|
|
|
++ctrl->ctrl.nr_reconnects;
|
2017-04-23 23:30:08 +08:00
|
|
|
|
2017-10-26 07:43:16 +08:00
|
|
|
if (ctrl->rport->remoteport.port_state != FC_OBJSTATE_ONLINE)
|
|
|
|
return -ENODEV;
|
|
|
|
|
nvme-fc: decouple ns references from lldd references
In the lldd api, a lldd may unregister a remoteport (loss of connectivity
or driver unload) or localport (driver unload). The lldd must wait for the
remoteport_delete or localport_delete before completing its actions post
the unregister. The xxx_deletes currently occur only when the xxxport
structure is fully freed after all references are removed. Thus the lldd
may be held hostage until an app or in-kernel entity that has a namespace
open finally closes so the namespace can be removed, the controller
removed, thus the transport objects, thus the lldd.
This patch decouples the transport and os-facing objects from the lldd
and the remoteport and localport. There is a point in all deletions
where the transport will no longer interact with the lldd on behalf of
a controller. That point centers around the association established
with the target/subsystem. It will access the lldd whenever it attempts
to create an association and while the association is active. New
associations may only be created if the remoteport is live (thus the
localport is live). It will not access the lldd after deleting the
association.
Therefore, the patch tracks the count of active controllers - those with
associations being created or that are active - on a remoteport. It also
tracks the number of remoteports that have active controllers, on a
a localport. When a remoteport is unregistered, as soon as there are no
active controllers, the lldd's remoteport_delete may be called and the
lldd may continue. Similarly, when a localport is unregistered, as soon
as there are no remoteports with active controllers, the localport_delete
callback may be made. This significantly speeds up unregistration with
the lldd.
The transport objects continue in suspended status with reconnect timers
running, and upon expiration, normal ref-counting will occur and the
objects will be freed. The transport object may still be held hostage
by the application/kernel module, but that is acceptable.
With this change, the lldd may be fully unloaded and reloaded, and
if registrations occur prior to the timeouts, the nvme controller and
namespaces will resume normally as if a link bounce.
Signed-off-by: James Smart <james.smart@broadcom.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
2017-11-03 23:13:17 +08:00
|
|
|
if (nvme_fc_ctlr_active_on_rport(ctrl))
|
|
|
|
return -ENOTUNIQ;
|
|
|
|
|
2019-05-30 06:25:26 +08:00
|
|
|
dev_info(ctrl->ctrl.device,
|
|
|
|
"NVME-FC{%d}: create association : host wwpn 0x%016llx "
|
|
|
|
" rport wwpn 0x%016llx: NQN \"%s\"\n",
|
|
|
|
ctrl->cnum, ctrl->lport->localport.port_name,
|
|
|
|
ctrl->rport->remoteport.port_name, ctrl->ctrl.opts->subsysnqn);
|
|
|
|
|
2017-04-23 23:30:08 +08:00
|
|
|
/*
|
|
|
|
* Create the admin queue
|
|
|
|
*/
|
|
|
|
|
|
|
|
ret = __nvme_fc_create_hw_queue(ctrl, &ctrl->queues[0], 0,
|
2018-03-08 07:59:36 +08:00
|
|
|
NVME_AQ_DEPTH);
|
2017-04-23 23:30:08 +08:00
|
|
|
if (ret)
|
|
|
|
goto out_free_queue;
|
|
|
|
|
|
|
|
ret = nvme_fc_connect_admin_queue(ctrl, &ctrl->queues[0],
|
2018-03-08 07:59:36 +08:00
|
|
|
NVME_AQ_DEPTH, (NVME_AQ_DEPTH / 4));
|
2017-04-23 23:30:08 +08:00
|
|
|
if (ret)
|
|
|
|
goto out_delete_hw_queue;
|
|
|
|
|
|
|
|
ret = nvmf_connect_admin_queue(&ctrl->ctrl);
|
|
|
|
if (ret)
|
|
|
|
goto out_disconnect_admin_queue;
|
|
|
|
|
2017-10-24 20:25:21 +08:00
|
|
|
set_bit(NVME_FC_Q_LIVE, &ctrl->queues[0].flags);
|
|
|
|
|
2017-04-23 23:30:08 +08:00
|
|
|
/*
|
|
|
|
* Check controller capabilities
|
|
|
|
*
|
|
|
|
* todo:- add code to check if ctrl attributes changed from
|
|
|
|
* prior connection values
|
|
|
|
*/
|
|
|
|
|
2019-07-23 08:06:53 +08:00
|
|
|
ret = nvme_enable_ctrl(&ctrl->ctrl);
|
2017-04-23 23:30:08 +08:00
|
|
|
if (ret)
|
|
|
|
goto out_disconnect_admin_queue;
|
|
|
|
|
2017-10-24 06:11:36 +08:00
|
|
|
ctrl->ctrl.max_hw_sectors =
|
|
|
|
(ctrl->lport->ops->max_sgl_segments - 1) << (PAGE_SHIFT - 9);
|
2017-04-23 23:30:08 +08:00
|
|
|
|
2019-08-03 10:33:59 +08:00
|
|
|
blk_mq_unquiesce_queue(ctrl->ctrl.admin_q);
|
|
|
|
|
2017-04-23 23:30:08 +08:00
|
|
|
ret = nvme_init_identify(&ctrl->ctrl);
|
|
|
|
if (ret)
|
|
|
|
goto out_disconnect_admin_queue;
|
|
|
|
|
|
|
|
/* sanity checks */
|
|
|
|
|
|
|
|
/* FC-NVME does not have other data in the capsule */
|
|
|
|
if (ctrl->ctrl.icdoff) {
|
|
|
|
dev_err(ctrl->ctrl.device, "icdoff %d is not supported!\n",
|
|
|
|
ctrl->ctrl.icdoff);
|
|
|
|
goto out_disconnect_admin_queue;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* FC-NVME supports normal SGL Data Block Descriptors */
|
|
|
|
|
|
|
|
if (opts->queue_size > ctrl->ctrl.maxcmd) {
|
|
|
|
/* warn if maxcmd is lower than queue_size */
|
|
|
|
dev_warn(ctrl->ctrl.device,
|
|
|
|
"queue_size %zu > ctrl maxcmd %u, reducing "
|
|
|
|
"to queue_size\n",
|
|
|
|
opts->queue_size, ctrl->ctrl.maxcmd);
|
|
|
|
opts->queue_size = ctrl->ctrl.maxcmd;
|
|
|
|
}
|
|
|
|
|
2018-03-08 07:59:36 +08:00
|
|
|
if (opts->queue_size > ctrl->ctrl.sqsize + 1) {
|
|
|
|
/* warn if sqsize is lower than queue_size */
|
|
|
|
dev_warn(ctrl->ctrl.device,
|
|
|
|
"queue_size %zu > ctrl sqsize %u, clamping down\n",
|
|
|
|
opts->queue_size, ctrl->ctrl.sqsize + 1);
|
|
|
|
opts->queue_size = ctrl->ctrl.sqsize + 1;
|
|
|
|
}
|
|
|
|
|
2017-04-23 23:30:08 +08:00
|
|
|
ret = nvme_fc_init_aen_ops(ctrl);
|
|
|
|
if (ret)
|
|
|
|
goto out_term_aen_ops;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Create the io queues
|
|
|
|
*/
|
|
|
|
|
2017-04-24 15:58:29 +08:00
|
|
|
if (ctrl->ctrl.queue_count > 1) {
|
nvme-fc: change controllers first connect to use reconnect path
Current code follows the framework that has been in the transports
from the beginning where initial link-side controller connect occurs
as part of "creating the controller". Thus that first connect fully
talks to the controller and obtains values that can then be used in
for blk-mq setup, etc. It also means that everything about the
controller is fully know before the "create controller" call returns.
This has several weaknesses:
- The initial create_ctrl call made by the cli will block for a long
time as wire transactions are performed synchronously. This delay
becomes longer if errors occur or connectivity is lost and retries
need to be performed.
- Code wise, it means there is a separate connect path for initial
controller connect vs the (same) steps used in the reconnect path.
- And as there's separate paths, it means there's separate error
handling and retry logic. It also plays havoc with the NEW state
(should transition out of it after successful initial connect) vs
the RESETTING and CONNECTING (reconnect) states that want to be
transitioned to on error.
- As there's separate paths, to recover from errors and disruptions,
it requires separate recovery/retry paths as well and can severely
convolute the controller state.
This patch reworks the fc transport to use the same connect paths
for the initial connection as it uses for reconnect. This makes a
single path for error recovery and handling.
This patch:
- Removes the driving of the initial connect and replaces it with
a state transition to CONNECTING and initiating the reconnect
thread. A dummy state transition of RESETTING had to be traversed
as a direct transtion of NEW->CONNECTING is not allowed. Given
that the controller is "new", the RESETTING transition is a simple
no-op. Once in the reconnecting thread, the normal behaviors of
ctrl_loss_tmo (max_retries * connect_delay) and dev_loss_tmo will
apply before the controller is torn down.
- Only if the state transitions couldn't be traversed and the
reconnect thread not scheduled, will the controller be torn down
while in create_ctrl.
- The prior code used the controller state of NEW to indicate
whether request queues had been initialized or not. For the admin
queue, the request queue is always created, so there's no need to
check a state. For IO queues, change to tracking whether a successful
io request queue create has occurred (e.g. 1st successful connect).
- The initial controller id is initialized to the dynamic controller
id used in the initial connect message. It will be overwritten by
the real controller id once the controller is connected on the wire.
Signed-off-by: James Smart <james.smart@broadcom.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
2018-06-14 05:07:37 +08:00
|
|
|
if (!ctrl->ioq_live)
|
2017-04-23 23:30:08 +08:00
|
|
|
ret = nvme_fc_create_io_queues(ctrl);
|
|
|
|
else
|
2018-06-14 05:07:38 +08:00
|
|
|
ret = nvme_fc_recreate_io_queues(ctrl);
|
2017-04-23 23:30:08 +08:00
|
|
|
if (ret)
|
|
|
|
goto out_term_aen_ops;
|
|
|
|
}
|
|
|
|
|
|
|
|
changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE);
|
|
|
|
|
2017-05-04 18:33:15 +08:00
|
|
|
ctrl->ctrl.nr_reconnects = 0;
|
2017-04-23 23:30:08 +08:00
|
|
|
|
2017-10-26 07:43:14 +08:00
|
|
|
if (changed)
|
|
|
|
nvme_start_ctrl(&ctrl->ctrl);
|
2017-04-23 23:30:08 +08:00
|
|
|
|
|
|
|
return 0; /* Success */
|
|
|
|
|
|
|
|
out_term_aen_ops:
|
|
|
|
nvme_fc_term_aen_ops(ctrl);
|
|
|
|
out_disconnect_admin_queue:
|
|
|
|
/* send a Disconnect(association) LS to fc-nvme target */
|
|
|
|
nvme_fc_xmt_disconnect_assoc(ctrl);
|
|
|
|
out_delete_hw_queue:
|
|
|
|
__nvme_fc_delete_hw_queue(ctrl, &ctrl->queues[0], 0);
|
|
|
|
out_free_queue:
|
|
|
|
nvme_fc_free_queue(&ctrl->queues[0]);
|
nvme-fc: decouple ns references from lldd references
In the lldd api, a lldd may unregister a remoteport (loss of connectivity
or driver unload) or localport (driver unload). The lldd must wait for the
remoteport_delete or localport_delete before completing its actions post
the unregister. The xxx_deletes currently occur only when the xxxport
structure is fully freed after all references are removed. Thus the lldd
may be held hostage until an app or in-kernel entity that has a namespace
open finally closes so the namespace can be removed, the controller
removed, thus the transport objects, thus the lldd.
This patch decouples the transport and os-facing objects from the lldd
and the remoteport and localport. There is a point in all deletions
where the transport will no longer interact with the lldd on behalf of
a controller. That point centers around the association established
with the target/subsystem. It will access the lldd whenever it attempts
to create an association and while the association is active. New
associations may only be created if the remoteport is live (thus the
localport is live). It will not access the lldd after deleting the
association.
Therefore, the patch tracks the count of active controllers - those with
associations being created or that are active - on a remoteport. It also
tracks the number of remoteports that have active controllers, on a
a localport. When a remoteport is unregistered, as soon as there are no
active controllers, the lldd's remoteport_delete may be called and the
lldd may continue. Similarly, when a localport is unregistered, as soon
as there are no remoteports with active controllers, the localport_delete
callback may be made. This significantly speeds up unregistration with
the lldd.
The transport objects continue in suspended status with reconnect timers
running, and upon expiration, normal ref-counting will occur and the
objects will be freed. The transport object may still be held hostage
by the application/kernel module, but that is acceptable.
With this change, the lldd may be fully unloaded and reloaded, and
if registrations occur prior to the timeouts, the nvme controller and
namespaces will resume normally as if a link bounce.
Signed-off-by: James Smart <james.smart@broadcom.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
2017-11-03 23:13:17 +08:00
|
|
|
ctrl->assoc_active = false;
|
|
|
|
nvme_fc_ctlr_inactive_on_rport(ctrl);
|
2016-12-02 16:28:42 +08:00
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2017-04-23 23:30:08 +08:00
|
|
|
/*
|
|
|
|
* This routine stops operation of the controller on the host side.
|
|
|
|
* On the host os stack side: Admin and IO queues are stopped,
|
|
|
|
* outstanding ios on them terminated via FC ABTS.
|
|
|
|
* On the link side: the association is terminated.
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
nvme_fc_delete_association(struct nvme_fc_ctrl *ctrl)
|
|
|
|
{
|
|
|
|
unsigned long flags;
|
|
|
|
|
nvme-fc: decouple ns references from lldd references
In the lldd api, a lldd may unregister a remoteport (loss of connectivity
or driver unload) or localport (driver unload). The lldd must wait for the
remoteport_delete or localport_delete before completing its actions post
the unregister. The xxx_deletes currently occur only when the xxxport
structure is fully freed after all references are removed. Thus the lldd
may be held hostage until an app or in-kernel entity that has a namespace
open finally closes so the namespace can be removed, the controller
removed, thus the transport objects, thus the lldd.
This patch decouples the transport and os-facing objects from the lldd
and the remoteport and localport. There is a point in all deletions
where the transport will no longer interact with the lldd on behalf of
a controller. That point centers around the association established
with the target/subsystem. It will access the lldd whenever it attempts
to create an association and while the association is active. New
associations may only be created if the remoteport is live (thus the
localport is live). It will not access the lldd after deleting the
association.
Therefore, the patch tracks the count of active controllers - those with
associations being created or that are active - on a remoteport. It also
tracks the number of remoteports that have active controllers, on a
a localport. When a remoteport is unregistered, as soon as there are no
active controllers, the lldd's remoteport_delete may be called and the
lldd may continue. Similarly, when a localport is unregistered, as soon
as there are no remoteports with active controllers, the localport_delete
callback may be made. This significantly speeds up unregistration with
the lldd.
The transport objects continue in suspended status with reconnect timers
running, and upon expiration, normal ref-counting will occur and the
objects will be freed. The transport object may still be held hostage
by the application/kernel module, but that is acceptable.
With this change, the lldd may be fully unloaded and reloaded, and
if registrations occur prior to the timeouts, the nvme controller and
namespaces will resume normally as if a link bounce.
Signed-off-by: James Smart <james.smart@broadcom.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
2017-11-03 23:13:17 +08:00
|
|
|
if (!ctrl->assoc_active)
|
|
|
|
return;
|
|
|
|
ctrl->assoc_active = false;
|
|
|
|
|
2017-04-23 23:30:08 +08:00
|
|
|
spin_lock_irqsave(&ctrl->lock, flags);
|
|
|
|
ctrl->flags |= FCCTRL_TERMIO;
|
|
|
|
ctrl->iocnt = 0;
|
|
|
|
spin_unlock_irqrestore(&ctrl->lock, flags);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If io queues are present, stop them and terminate all outstanding
|
|
|
|
* ios on them. As FC allocates FC exchange for each io, the
|
|
|
|
* transport must contact the LLDD to terminate the exchange,
|
|
|
|
* thus releasing the FC exchange. We use blk_mq_tagset_busy_itr()
|
|
|
|
* to tell us what io's are busy and invoke a transport routine
|
|
|
|
* to kill them with the LLDD. After terminating the exchange
|
|
|
|
* the LLDD will call the transport's normal io done path, but it
|
|
|
|
* will have an aborted status. The done path will return the
|
|
|
|
* io requests back to the block layer as part of normal completions
|
|
|
|
* (but with error status).
|
|
|
|
*/
|
2017-04-24 15:58:29 +08:00
|
|
|
if (ctrl->ctrl.queue_count > 1) {
|
2017-04-23 23:30:08 +08:00
|
|
|
nvme_stop_queues(&ctrl->ctrl);
|
|
|
|
blk_mq_tagset_busy_iter(&ctrl->tag_set,
|
|
|
|
nvme_fc_terminate_exchange, &ctrl->ctrl);
|
2019-07-24 11:48:42 +08:00
|
|
|
blk_mq_tagset_wait_completed_request(&ctrl->tag_set);
|
2017-04-23 23:30:08 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Other transports, which don't have link-level contexts bound
|
|
|
|
* to sqe's, would try to gracefully shutdown the controller by
|
|
|
|
* writing the registers for shutdown and polling (call
|
|
|
|
* nvme_shutdown_ctrl()). Given a bunch of i/o was potentially
|
|
|
|
* just aborted and we will wait on those contexts, and given
|
|
|
|
* there was no indication of how live the controlelr is on the
|
|
|
|
* link, don't send more io to create more contexts for the
|
|
|
|
* shutdown. Let the controller fail via keepalive failure if
|
|
|
|
* its still present.
|
|
|
|
*/
|
|
|
|
|
|
|
|
/*
|
|
|
|
* clean up the admin queue. Same thing as above.
|
|
|
|
* use blk_mq_tagset_busy_itr() and the transport routine to
|
|
|
|
* terminate the exchanges.
|
|
|
|
*/
|
nvme-fc: change controllers first connect to use reconnect path
Current code follows the framework that has been in the transports
from the beginning where initial link-side controller connect occurs
as part of "creating the controller". Thus that first connect fully
talks to the controller and obtains values that can then be used in
for blk-mq setup, etc. It also means that everything about the
controller is fully know before the "create controller" call returns.
This has several weaknesses:
- The initial create_ctrl call made by the cli will block for a long
time as wire transactions are performed synchronously. This delay
becomes longer if errors occur or connectivity is lost and retries
need to be performed.
- Code wise, it means there is a separate connect path for initial
controller connect vs the (same) steps used in the reconnect path.
- And as there's separate paths, it means there's separate error
handling and retry logic. It also plays havoc with the NEW state
(should transition out of it after successful initial connect) vs
the RESETTING and CONNECTING (reconnect) states that want to be
transitioned to on error.
- As there's separate paths, to recover from errors and disruptions,
it requires separate recovery/retry paths as well and can severely
convolute the controller state.
This patch reworks the fc transport to use the same connect paths
for the initial connection as it uses for reconnect. This makes a
single path for error recovery and handling.
This patch:
- Removes the driving of the initial connect and replaces it with
a state transition to CONNECTING and initiating the reconnect
thread. A dummy state transition of RESETTING had to be traversed
as a direct transtion of NEW->CONNECTING is not allowed. Given
that the controller is "new", the RESETTING transition is a simple
no-op. Once in the reconnecting thread, the normal behaviors of
ctrl_loss_tmo (max_retries * connect_delay) and dev_loss_tmo will
apply before the controller is torn down.
- Only if the state transitions couldn't be traversed and the
reconnect thread not scheduled, will the controller be torn down
while in create_ctrl.
- The prior code used the controller state of NEW to indicate
whether request queues had been initialized or not. For the admin
queue, the request queue is always created, so there's no need to
check a state. For IO queues, change to tracking whether a successful
io request queue create has occurred (e.g. 1st successful connect).
- The initial controller id is initialized to the dynamic controller
id used in the initial connect message. It will be overwritten by
the real controller id once the controller is connected on the wire.
Signed-off-by: James Smart <james.smart@broadcom.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
2018-06-14 05:07:37 +08:00
|
|
|
blk_mq_quiesce_queue(ctrl->ctrl.admin_q);
|
2017-04-23 23:30:08 +08:00
|
|
|
blk_mq_tagset_busy_iter(&ctrl->admin_tag_set,
|
|
|
|
nvme_fc_terminate_exchange, &ctrl->ctrl);
|
2019-07-24 11:48:42 +08:00
|
|
|
blk_mq_tagset_wait_completed_request(&ctrl->admin_tag_set);
|
2017-04-23 23:30:08 +08:00
|
|
|
|
|
|
|
/* kill the aens as they are a separate path */
|
|
|
|
nvme_fc_abort_aen_ops(ctrl);
|
|
|
|
|
|
|
|
/* wait for all io that had to be aborted */
|
2017-10-10 04:39:44 +08:00
|
|
|
spin_lock_irq(&ctrl->lock);
|
2017-05-23 06:28:42 +08:00
|
|
|
wait_event_lock_irq(ctrl->ioabort_wait, ctrl->iocnt == 0, ctrl->lock);
|
2017-04-23 23:30:08 +08:00
|
|
|
ctrl->flags &= ~FCCTRL_TERMIO;
|
2017-10-10 04:39:44 +08:00
|
|
|
spin_unlock_irq(&ctrl->lock);
|
2017-04-23 23:30:08 +08:00
|
|
|
|
|
|
|
nvme_fc_term_aen_ops(ctrl);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* send a Disconnect(association) LS to fc-nvme target
|
|
|
|
* Note: could have been sent at top of process, but
|
|
|
|
* cleaner on link traffic if after the aborts complete.
|
|
|
|
* Note: if association doesn't exist, association_id will be 0
|
|
|
|
*/
|
|
|
|
if (ctrl->association_id)
|
|
|
|
nvme_fc_xmt_disconnect_assoc(ctrl);
|
|
|
|
|
|
|
|
if (ctrl->ctrl.tagset) {
|
|
|
|
nvme_fc_delete_hw_io_queues(ctrl);
|
|
|
|
nvme_fc_free_io_queues(ctrl);
|
|
|
|
}
|
|
|
|
|
|
|
|
__nvme_fc_delete_hw_queue(ctrl, &ctrl->queues[0], 0);
|
|
|
|
nvme_fc_free_queue(&ctrl->queues[0]);
|
nvme-fc: decouple ns references from lldd references
In the lldd api, a lldd may unregister a remoteport (loss of connectivity
or driver unload) or localport (driver unload). The lldd must wait for the
remoteport_delete or localport_delete before completing its actions post
the unregister. The xxx_deletes currently occur only when the xxxport
structure is fully freed after all references are removed. Thus the lldd
may be held hostage until an app or in-kernel entity that has a namespace
open finally closes so the namespace can be removed, the controller
removed, thus the transport objects, thus the lldd.
This patch decouples the transport and os-facing objects from the lldd
and the remoteport and localport. There is a point in all deletions
where the transport will no longer interact with the lldd on behalf of
a controller. That point centers around the association established
with the target/subsystem. It will access the lldd whenever it attempts
to create an association and while the association is active. New
associations may only be created if the remoteport is live (thus the
localport is live). It will not access the lldd after deleting the
association.
Therefore, the patch tracks the count of active controllers - those with
associations being created or that are active - on a remoteport. It also
tracks the number of remoteports that have active controllers, on a
a localport. When a remoteport is unregistered, as soon as there are no
active controllers, the lldd's remoteport_delete may be called and the
lldd may continue. Similarly, when a localport is unregistered, as soon
as there are no remoteports with active controllers, the localport_delete
callback may be made. This significantly speeds up unregistration with
the lldd.
The transport objects continue in suspended status with reconnect timers
running, and upon expiration, normal ref-counting will occur and the
objects will be freed. The transport object may still be held hostage
by the application/kernel module, but that is acceptable.
With this change, the lldd may be fully unloaded and reloaded, and
if registrations occur prior to the timeouts, the nvme controller and
namespaces will resume normally as if a link bounce.
Signed-off-by: James Smart <james.smart@broadcom.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
2017-11-03 23:13:17 +08:00
|
|
|
|
nvme-fc: fix rogue admin cmds stalling teardown
When connectivity is lost to a device, the association is terminated
and the blk-mq queues are quiesced/stopped. When connectivity is
re-established, they are resumed.
If an admin command is received while connectivity is list, the ioctl
queues the command on the admin_q and the command stalls (the thread
issuing the ioctl hangs/waits). if the connectivity is lost long
enough such that the controller is then deleted, the delete code
makes its calls to initiate the delete, which then expects the core
layer to call the transport when all references are removed and the
controller can be freed. Unfortunately, nothing in this path dequeued
the admin command, so a reference sits outstanding and things stop,
hanging the delete indefinitely.
Correct by unquiescing the admin queue in the delete association. This
means any admin command (which should only be from an ioctl) issued
after connectivity is lost will detect the controller is in a
reconnecting state and will (fast) fail the command. Thus, a pending
reference can no longer be created. Once connectivity is re-established,
a new ioctl/admin command would see proper device state and function again.
Signed-off-by: James Smart <james.smart@broadcom.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
2018-01-12 06:29:22 +08:00
|
|
|
/* re-enable the admin_q so anything new can fast fail */
|
|
|
|
blk_mq_unquiesce_queue(ctrl->ctrl.admin_q);
|
|
|
|
|
2018-06-20 22:44:12 +08:00
|
|
|
/* resume the io queues so that things will fast fail */
|
|
|
|
nvme_start_queues(&ctrl->ctrl);
|
|
|
|
|
nvme-fc: decouple ns references from lldd references
In the lldd api, a lldd may unregister a remoteport (loss of connectivity
or driver unload) or localport (driver unload). The lldd must wait for the
remoteport_delete or localport_delete before completing its actions post
the unregister. The xxx_deletes currently occur only when the xxxport
structure is fully freed after all references are removed. Thus the lldd
may be held hostage until an app or in-kernel entity that has a namespace
open finally closes so the namespace can be removed, the controller
removed, thus the transport objects, thus the lldd.
This patch decouples the transport and os-facing objects from the lldd
and the remoteport and localport. There is a point in all deletions
where the transport will no longer interact with the lldd on behalf of
a controller. That point centers around the association established
with the target/subsystem. It will access the lldd whenever it attempts
to create an association and while the association is active. New
associations may only be created if the remoteport is live (thus the
localport is live). It will not access the lldd after deleting the
association.
Therefore, the patch tracks the count of active controllers - those with
associations being created or that are active - on a remoteport. It also
tracks the number of remoteports that have active controllers, on a
a localport. When a remoteport is unregistered, as soon as there are no
active controllers, the lldd's remoteport_delete may be called and the
lldd may continue. Similarly, when a localport is unregistered, as soon
as there are no remoteports with active controllers, the localport_delete
callback may be made. This significantly speeds up unregistration with
the lldd.
The transport objects continue in suspended status with reconnect timers
running, and upon expiration, normal ref-counting will occur and the
objects will be freed. The transport object may still be held hostage
by the application/kernel module, but that is acceptable.
With this change, the lldd may be fully unloaded and reloaded, and
if registrations occur prior to the timeouts, the nvme controller and
namespaces will resume normally as if a link bounce.
Signed-off-by: James Smart <james.smart@broadcom.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
2017-11-03 23:13:17 +08:00
|
|
|
nvme_fc_ctlr_inactive_on_rport(ctrl);
|
2017-04-23 23:30:08 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
2017-10-29 16:44:29 +08:00
|
|
|
nvme_fc_delete_ctrl(struct nvme_ctrl *nctrl)
|
2017-04-23 23:30:08 +08:00
|
|
|
{
|
2017-10-29 16:44:29 +08:00
|
|
|
struct nvme_fc_ctrl *ctrl = to_fc_ctrl(nctrl);
|
2017-04-23 23:30:08 +08:00
|
|
|
|
2018-11-15 08:35:10 +08:00
|
|
|
cancel_work_sync(&ctrl->err_work);
|
2017-04-23 23:30:08 +08:00
|
|
|
cancel_delayed_work_sync(&ctrl->connect_work);
|
|
|
|
/*
|
|
|
|
* kill the association on the link side. this will block
|
|
|
|
* waiting for io to terminate
|
|
|
|
*/
|
|
|
|
nvme_fc_delete_association(ctrl);
|
|
|
|
}
|
|
|
|
|
2017-05-16 08:10:16 +08:00
|
|
|
static void
|
|
|
|
nvme_fc_reconnect_or_delete(struct nvme_fc_ctrl *ctrl, int status)
|
|
|
|
{
|
2017-10-26 07:43:17 +08:00
|
|
|
struct nvme_fc_rport *rport = ctrl->rport;
|
|
|
|
struct nvme_fc_remote_port *portptr = &rport->remoteport;
|
|
|
|
unsigned long recon_delay = ctrl->ctrl.opts->reconnect_delay * HZ;
|
|
|
|
bool recon = true;
|
2017-05-16 08:10:16 +08:00
|
|
|
|
2018-02-01 00:31:24 +08:00
|
|
|
if (ctrl->ctrl.state != NVME_CTRL_CONNECTING)
|
2017-05-16 08:10:16 +08:00
|
|
|
return;
|
|
|
|
|
2017-10-26 07:43:17 +08:00
|
|
|
if (portptr->port_state == FC_OBJSTATE_ONLINE)
|
2017-05-16 08:10:16 +08:00
|
|
|
dev_info(ctrl->ctrl.device,
|
2017-10-26 07:43:17 +08:00
|
|
|
"NVME-FC{%d}: reset: Reconnect attempt failed (%d)\n",
|
|
|
|
ctrl->cnum, status);
|
|
|
|
else if (time_after_eq(jiffies, rport->dev_loss_end))
|
|
|
|
recon = false;
|
2017-05-16 08:10:16 +08:00
|
|
|
|
2017-10-26 07:43:17 +08:00
|
|
|
if (recon && nvmf_should_reconnect(&ctrl->ctrl)) {
|
|
|
|
if (portptr->port_state == FC_OBJSTATE_ONLINE)
|
|
|
|
dev_info(ctrl->ctrl.device,
|
|
|
|
"NVME-FC{%d}: Reconnect attempt in %ld "
|
|
|
|
"seconds\n",
|
|
|
|
ctrl->cnum, recon_delay / HZ);
|
|
|
|
else if (time_after(jiffies + recon_delay, rport->dev_loss_end))
|
|
|
|
recon_delay = rport->dev_loss_end - jiffies;
|
2017-10-26 07:43:16 +08:00
|
|
|
|
2017-10-26 07:43:17 +08:00
|
|
|
queue_delayed_work(nvme_wq, &ctrl->connect_work, recon_delay);
|
2017-05-16 08:10:16 +08:00
|
|
|
} else {
|
2017-10-26 07:43:17 +08:00
|
|
|
if (portptr->port_state == FC_OBJSTATE_ONLINE)
|
|
|
|
dev_warn(ctrl->ctrl.device,
|
2017-05-16 08:10:16 +08:00
|
|
|
"NVME-FC{%d}: Max reconnect attempts (%d) "
|
2018-03-11 23:46:06 +08:00
|
|
|
"reached.\n",
|
2017-05-04 18:33:15 +08:00
|
|
|
ctrl->cnum, ctrl->ctrl.nr_reconnects);
|
2017-10-26 07:43:17 +08:00
|
|
|
else
|
|
|
|
dev_warn(ctrl->ctrl.device,
|
|
|
|
"NVME-FC{%d}: dev_loss_tmo (%d) expired "
|
2018-03-11 23:46:06 +08:00
|
|
|
"while waiting for remoteport connectivity.\n",
|
|
|
|
ctrl->cnum, portptr->dev_loss_tmo);
|
2017-10-29 16:44:29 +08:00
|
|
|
WARN_ON(nvme_delete_ctrl(&ctrl->ctrl));
|
2017-05-16 08:10:16 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-04-23 23:30:08 +08:00
|
|
|
static void
|
2018-11-15 08:35:10 +08:00
|
|
|
__nvme_fc_terminate_io(struct nvme_fc_ctrl *ctrl)
|
2017-04-23 23:30:08 +08:00
|
|
|
{
|
2018-11-15 08:35:10 +08:00
|
|
|
nvme_stop_keep_alive(&ctrl->ctrl);
|
2017-10-26 07:43:14 +08:00
|
|
|
|
2017-04-23 23:30:08 +08:00
|
|
|
/* will block will waiting for io to terminate */
|
|
|
|
nvme_fc_delete_association(ctrl);
|
|
|
|
|
2018-11-15 08:35:10 +08:00
|
|
|
if (ctrl->ctrl.state != NVME_CTRL_CONNECTING &&
|
|
|
|
!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING))
|
2017-10-26 07:43:14 +08:00
|
|
|
dev_err(ctrl->ctrl.device,
|
|
|
|
"NVME-FC{%d}: error_recovery: Couldn't change state "
|
2018-02-01 00:31:24 +08:00
|
|
|
"to CONNECTING\n", ctrl->cnum);
|
2018-11-15 08:35:10 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
nvme_fc_reset_ctrl_work(struct work_struct *work)
|
|
|
|
{
|
|
|
|
struct nvme_fc_ctrl *ctrl =
|
|
|
|
container_of(work, struct nvme_fc_ctrl, ctrl.reset_work);
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
__nvme_fc_terminate_io(ctrl);
|
|
|
|
|
|
|
|
nvme_stop_ctrl(&ctrl->ctrl);
|
2017-10-26 07:43:14 +08:00
|
|
|
|
2017-10-26 07:43:17 +08:00
|
|
|
if (ctrl->rport->remoteport.port_state == FC_OBJSTATE_ONLINE)
|
2017-10-26 07:43:16 +08:00
|
|
|
ret = nvme_fc_create_association(ctrl);
|
2017-10-26 07:43:17 +08:00
|
|
|
else
|
|
|
|
ret = -ENOTCONN;
|
|
|
|
|
2017-05-16 08:10:16 +08:00
|
|
|
if (ret)
|
|
|
|
nvme_fc_reconnect_or_delete(ctrl, ret);
|
|
|
|
else
|
2017-04-23 23:30:08 +08:00
|
|
|
dev_info(ctrl->ctrl.device,
|
2017-10-26 07:43:17 +08:00
|
|
|
"NVME-FC{%d}: controller reset complete\n",
|
|
|
|
ctrl->cnum);
|
2017-04-23 23:30:08 +08:00
|
|
|
}
|
|
|
|
|
2018-11-15 08:35:10 +08:00
|
|
|
static void
|
|
|
|
nvme_fc_connect_err_work(struct work_struct *work)
|
|
|
|
{
|
|
|
|
struct nvme_fc_ctrl *ctrl =
|
|
|
|
container_of(work, struct nvme_fc_ctrl, err_work);
|
|
|
|
|
|
|
|
__nvme_fc_terminate_io(ctrl);
|
|
|
|
|
|
|
|
atomic_set(&ctrl->err_work_active, 0);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Rescheduling the connection after recovering
|
|
|
|
* from the io error is left to the reconnect work
|
|
|
|
* item, which is what should have stalled waiting on
|
|
|
|
* the io that had the error that scheduled this work.
|
|
|
|
*/
|
|
|
|
}
|
|
|
|
|
2017-04-23 23:30:08 +08:00
|
|
|
static const struct nvme_ctrl_ops nvme_fc_ctrl_ops = {
|
|
|
|
.name = "fc",
|
|
|
|
.module = THIS_MODULE,
|
2017-05-20 21:14:44 +08:00
|
|
|
.flags = NVME_F_FABRICS,
|
2017-04-23 23:30:08 +08:00
|
|
|
.reg_read32 = nvmf_reg_read32,
|
|
|
|
.reg_read64 = nvmf_reg_read64,
|
|
|
|
.reg_write32 = nvmf_reg_write32,
|
|
|
|
.free_ctrl = nvme_fc_nvme_ctrl_freed,
|
|
|
|
.submit_async_event = nvme_fc_submit_async_event,
|
2017-10-29 16:44:29 +08:00
|
|
|
.delete_ctrl = nvme_fc_delete_ctrl,
|
2017-04-23 23:30:08 +08:00
|
|
|
.get_address = nvmf_get_address,
|
|
|
|
};
|
|
|
|
|
|
|
|
static void
|
|
|
|
nvme_fc_connect_ctrl_work(struct work_struct *work)
|
|
|
|
{
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
struct nvme_fc_ctrl *ctrl =
|
|
|
|
container_of(to_delayed_work(work),
|
|
|
|
struct nvme_fc_ctrl, connect_work);
|
|
|
|
|
|
|
|
ret = nvme_fc_create_association(ctrl);
|
2017-05-16 08:10:16 +08:00
|
|
|
if (ret)
|
|
|
|
nvme_fc_reconnect_or_delete(ctrl, ret);
|
|
|
|
else
|
2017-04-23 23:30:08 +08:00
|
|
|
dev_info(ctrl->ctrl.device,
|
nvme-fc: change controllers first connect to use reconnect path
Current code follows the framework that has been in the transports
from the beginning where initial link-side controller connect occurs
as part of "creating the controller". Thus that first connect fully
talks to the controller and obtains values that can then be used in
for blk-mq setup, etc. It also means that everything about the
controller is fully know before the "create controller" call returns.
This has several weaknesses:
- The initial create_ctrl call made by the cli will block for a long
time as wire transactions are performed synchronously. This delay
becomes longer if errors occur or connectivity is lost and retries
need to be performed.
- Code wise, it means there is a separate connect path for initial
controller connect vs the (same) steps used in the reconnect path.
- And as there's separate paths, it means there's separate error
handling and retry logic. It also plays havoc with the NEW state
(should transition out of it after successful initial connect) vs
the RESETTING and CONNECTING (reconnect) states that want to be
transitioned to on error.
- As there's separate paths, to recover from errors and disruptions,
it requires separate recovery/retry paths as well and can severely
convolute the controller state.
This patch reworks the fc transport to use the same connect paths
for the initial connection as it uses for reconnect. This makes a
single path for error recovery and handling.
This patch:
- Removes the driving of the initial connect and replaces it with
a state transition to CONNECTING and initiating the reconnect
thread. A dummy state transition of RESETTING had to be traversed
as a direct transtion of NEW->CONNECTING is not allowed. Given
that the controller is "new", the RESETTING transition is a simple
no-op. Once in the reconnecting thread, the normal behaviors of
ctrl_loss_tmo (max_retries * connect_delay) and dev_loss_tmo will
apply before the controller is torn down.
- Only if the state transitions couldn't be traversed and the
reconnect thread not scheduled, will the controller be torn down
while in create_ctrl.
- The prior code used the controller state of NEW to indicate
whether request queues had been initialized or not. For the admin
queue, the request queue is always created, so there's no need to
check a state. For IO queues, change to tracking whether a successful
io request queue create has occurred (e.g. 1st successful connect).
- The initial controller id is initialized to the dynamic controller
id used in the initial connect message. It will be overwritten by
the real controller id once the controller is connected on the wire.
Signed-off-by: James Smart <james.smart@broadcom.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
2018-06-14 05:07:37 +08:00
|
|
|
"NVME-FC{%d}: controller connect complete\n",
|
2017-04-23 23:30:08 +08:00
|
|
|
ctrl->cnum);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
static const struct blk_mq_ops nvme_fc_admin_mq_ops = {
|
|
|
|
.queue_rq = nvme_fc_queue_rq,
|
|
|
|
.complete = nvme_fc_complete_rq,
|
2017-06-13 15:15:20 +08:00
|
|
|
.init_request = nvme_fc_init_request,
|
2017-04-23 23:30:08 +08:00
|
|
|
.exit_request = nvme_fc_exit_request,
|
|
|
|
.init_hctx = nvme_fc_init_admin_hctx,
|
|
|
|
.timeout = nvme_fc_timeout,
|
|
|
|
};
|
|
|
|
|
2016-12-02 16:28:42 +08:00
|
|
|
|
2017-10-21 07:17:08 +08:00
|
|
|
/*
|
|
|
|
* Fails a controller request if it matches an existing controller
|
|
|
|
* (association) with the same tuple:
|
|
|
|
* <Host NQN, Host ID, local FC port, remote FC port, SUBSYS NQN>
|
|
|
|
*
|
|
|
|
* The ports don't need to be compared as they are intrinsically
|
|
|
|
* already matched by the port pointers supplied.
|
|
|
|
*/
|
|
|
|
static bool
|
|
|
|
nvme_fc_existing_controller(struct nvme_fc_rport *rport,
|
|
|
|
struct nvmf_ctrl_options *opts)
|
|
|
|
{
|
|
|
|
struct nvme_fc_ctrl *ctrl;
|
|
|
|
unsigned long flags;
|
|
|
|
bool found = false;
|
|
|
|
|
|
|
|
spin_lock_irqsave(&rport->lock, flags);
|
|
|
|
list_for_each_entry(ctrl, &rport->ctrl_list, ctrl_list) {
|
|
|
|
found = nvmf_ctlr_matches_baseopts(&ctrl->ctrl, opts);
|
|
|
|
if (found)
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
spin_unlock_irqrestore(&rport->lock, flags);
|
|
|
|
|
|
|
|
return found;
|
|
|
|
}
|
|
|
|
|
2016-12-02 16:28:42 +08:00
|
|
|
static struct nvme_ctrl *
|
2017-04-23 23:30:08 +08:00
|
|
|
nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
|
2016-12-02 16:28:42 +08:00
|
|
|
struct nvme_fc_lport *lport, struct nvme_fc_rport *rport)
|
|
|
|
{
|
|
|
|
struct nvme_fc_ctrl *ctrl;
|
|
|
|
unsigned long flags;
|
nvme-fc: change controllers first connect to use reconnect path
Current code follows the framework that has been in the transports
from the beginning where initial link-side controller connect occurs
as part of "creating the controller". Thus that first connect fully
talks to the controller and obtains values that can then be used in
for blk-mq setup, etc. It also means that everything about the
controller is fully know before the "create controller" call returns.
This has several weaknesses:
- The initial create_ctrl call made by the cli will block for a long
time as wire transactions are performed synchronously. This delay
becomes longer if errors occur or connectivity is lost and retries
need to be performed.
- Code wise, it means there is a separate connect path for initial
controller connect vs the (same) steps used in the reconnect path.
- And as there's separate paths, it means there's separate error
handling and retry logic. It also plays havoc with the NEW state
(should transition out of it after successful initial connect) vs
the RESETTING and CONNECTING (reconnect) states that want to be
transitioned to on error.
- As there's separate paths, to recover from errors and disruptions,
it requires separate recovery/retry paths as well and can severely
convolute the controller state.
This patch reworks the fc transport to use the same connect paths
for the initial connection as it uses for reconnect. This makes a
single path for error recovery and handling.
This patch:
- Removes the driving of the initial connect and replaces it with
a state transition to CONNECTING and initiating the reconnect
thread. A dummy state transition of RESETTING had to be traversed
as a direct transtion of NEW->CONNECTING is not allowed. Given
that the controller is "new", the RESETTING transition is a simple
no-op. Once in the reconnecting thread, the normal behaviors of
ctrl_loss_tmo (max_retries * connect_delay) and dev_loss_tmo will
apply before the controller is torn down.
- Only if the state transitions couldn't be traversed and the
reconnect thread not scheduled, will the controller be torn down
while in create_ctrl.
- The prior code used the controller state of NEW to indicate
whether request queues had been initialized or not. For the admin
queue, the request queue is always created, so there's no need to
check a state. For IO queues, change to tracking whether a successful
io request queue create has occurred (e.g. 1st successful connect).
- The initial controller id is initialized to the dynamic controller
id used in the initial connect message. It will be overwritten by
the real controller id once the controller is connected on the wire.
Signed-off-by: James Smart <james.smart@broadcom.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
2018-06-14 05:07:37 +08:00
|
|
|
int ret, idx;
|
2016-12-02 16:28:42 +08:00
|
|
|
|
2017-05-06 07:13:15 +08:00
|
|
|
if (!(rport->remoteport.port_role &
|
|
|
|
(FC_PORT_ROLE_NVME_DISCOVERY | FC_PORT_ROLE_NVME_TARGET))) {
|
|
|
|
ret = -EBADR;
|
|
|
|
goto out_fail;
|
|
|
|
}
|
|
|
|
|
2017-10-21 07:17:08 +08:00
|
|
|
if (!opts->duplicate_connect &&
|
|
|
|
nvme_fc_existing_controller(rport, opts)) {
|
|
|
|
ret = -EALREADY;
|
|
|
|
goto out_fail;
|
|
|
|
}
|
|
|
|
|
2016-12-02 16:28:42 +08:00
|
|
|
ctrl = kzalloc(sizeof(*ctrl), GFP_KERNEL);
|
|
|
|
if (!ctrl) {
|
|
|
|
ret = -ENOMEM;
|
|
|
|
goto out_fail;
|
|
|
|
}
|
|
|
|
|
|
|
|
idx = ida_simple_get(&nvme_fc_ctrl_cnt, 0, 0, GFP_KERNEL);
|
|
|
|
if (idx < 0) {
|
|
|
|
ret = -ENOSPC;
|
|
|
|
goto out_free_ctrl;
|
|
|
|
}
|
|
|
|
|
|
|
|
ctrl->ctrl.opts = opts;
|
nvme-fc: change controllers first connect to use reconnect path
Current code follows the framework that has been in the transports
from the beginning where initial link-side controller connect occurs
as part of "creating the controller". Thus that first connect fully
talks to the controller and obtains values that can then be used in
for blk-mq setup, etc. It also means that everything about the
controller is fully know before the "create controller" call returns.
This has several weaknesses:
- The initial create_ctrl call made by the cli will block for a long
time as wire transactions are performed synchronously. This delay
becomes longer if errors occur or connectivity is lost and retries
need to be performed.
- Code wise, it means there is a separate connect path for initial
controller connect vs the (same) steps used in the reconnect path.
- And as there's separate paths, it means there's separate error
handling and retry logic. It also plays havoc with the NEW state
(should transition out of it after successful initial connect) vs
the RESETTING and CONNECTING (reconnect) states that want to be
transitioned to on error.
- As there's separate paths, to recover from errors and disruptions,
it requires separate recovery/retry paths as well and can severely
convolute the controller state.
This patch reworks the fc transport to use the same connect paths
for the initial connection as it uses for reconnect. This makes a
single path for error recovery and handling.
This patch:
- Removes the driving of the initial connect and replaces it with
a state transition to CONNECTING and initiating the reconnect
thread. A dummy state transition of RESETTING had to be traversed
as a direct transtion of NEW->CONNECTING is not allowed. Given
that the controller is "new", the RESETTING transition is a simple
no-op. Once in the reconnecting thread, the normal behaviors of
ctrl_loss_tmo (max_retries * connect_delay) and dev_loss_tmo will
apply before the controller is torn down.
- Only if the state transitions couldn't be traversed and the
reconnect thread not scheduled, will the controller be torn down
while in create_ctrl.
- The prior code used the controller state of NEW to indicate
whether request queues had been initialized or not. For the admin
queue, the request queue is always created, so there's no need to
check a state. For IO queues, change to tracking whether a successful
io request queue create has occurred (e.g. 1st successful connect).
- The initial controller id is initialized to the dynamic controller
id used in the initial connect message. It will be overwritten by
the real controller id once the controller is connected on the wire.
Signed-off-by: James Smart <james.smart@broadcom.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
2018-06-14 05:07:37 +08:00
|
|
|
ctrl->ctrl.nr_reconnects = 0;
|
2019-03-14 01:55:01 +08:00
|
|
|
if (lport->dev)
|
|
|
|
ctrl->ctrl.numa_node = dev_to_node(lport->dev);
|
|
|
|
else
|
|
|
|
ctrl->ctrl.numa_node = NUMA_NO_NODE;
|
2016-12-02 16:28:42 +08:00
|
|
|
INIT_LIST_HEAD(&ctrl->ctrl_list);
|
|
|
|
ctrl->lport = lport;
|
|
|
|
ctrl->rport = rport;
|
|
|
|
ctrl->dev = lport->dev;
|
|
|
|
ctrl->cnum = idx;
|
nvme-fc: change controllers first connect to use reconnect path
Current code follows the framework that has been in the transports
from the beginning where initial link-side controller connect occurs
as part of "creating the controller". Thus that first connect fully
talks to the controller and obtains values that can then be used in
for blk-mq setup, etc. It also means that everything about the
controller is fully know before the "create controller" call returns.
This has several weaknesses:
- The initial create_ctrl call made by the cli will block for a long
time as wire transactions are performed synchronously. This delay
becomes longer if errors occur or connectivity is lost and retries
need to be performed.
- Code wise, it means there is a separate connect path for initial
controller connect vs the (same) steps used in the reconnect path.
- And as there's separate paths, it means there's separate error
handling and retry logic. It also plays havoc with the NEW state
(should transition out of it after successful initial connect) vs
the RESETTING and CONNECTING (reconnect) states that want to be
transitioned to on error.
- As there's separate paths, to recover from errors and disruptions,
it requires separate recovery/retry paths as well and can severely
convolute the controller state.
This patch reworks the fc transport to use the same connect paths
for the initial connection as it uses for reconnect. This makes a
single path for error recovery and handling.
This patch:
- Removes the driving of the initial connect and replaces it with
a state transition to CONNECTING and initiating the reconnect
thread. A dummy state transition of RESETTING had to be traversed
as a direct transtion of NEW->CONNECTING is not allowed. Given
that the controller is "new", the RESETTING transition is a simple
no-op. Once in the reconnecting thread, the normal behaviors of
ctrl_loss_tmo (max_retries * connect_delay) and dev_loss_tmo will
apply before the controller is torn down.
- Only if the state transitions couldn't be traversed and the
reconnect thread not scheduled, will the controller be torn down
while in create_ctrl.
- The prior code used the controller state of NEW to indicate
whether request queues had been initialized or not. For the admin
queue, the request queue is always created, so there's no need to
check a state. For IO queues, change to tracking whether a successful
io request queue create has occurred (e.g. 1st successful connect).
- The initial controller id is initialized to the dynamic controller
id used in the initial connect message. It will be overwritten by
the real controller id once the controller is connected on the wire.
Signed-off-by: James Smart <james.smart@broadcom.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
2018-06-14 05:07:37 +08:00
|
|
|
ctrl->ioq_live = false;
|
nvme-fc: decouple ns references from lldd references
In the lldd api, a lldd may unregister a remoteport (loss of connectivity
or driver unload) or localport (driver unload). The lldd must wait for the
remoteport_delete or localport_delete before completing its actions post
the unregister. The xxx_deletes currently occur only when the xxxport
structure is fully freed after all references are removed. Thus the lldd
may be held hostage until an app or in-kernel entity that has a namespace
open finally closes so the namespace can be removed, the controller
removed, thus the transport objects, thus the lldd.
This patch decouples the transport and os-facing objects from the lldd
and the remoteport and localport. There is a point in all deletions
where the transport will no longer interact with the lldd on behalf of
a controller. That point centers around the association established
with the target/subsystem. It will access the lldd whenever it attempts
to create an association and while the association is active. New
associations may only be created if the remoteport is live (thus the
localport is live). It will not access the lldd after deleting the
association.
Therefore, the patch tracks the count of active controllers - those with
associations being created or that are active - on a remoteport. It also
tracks the number of remoteports that have active controllers, on a
a localport. When a remoteport is unregistered, as soon as there are no
active controllers, the lldd's remoteport_delete may be called and the
lldd may continue. Similarly, when a localport is unregistered, as soon
as there are no remoteports with active controllers, the localport_delete
callback may be made. This significantly speeds up unregistration with
the lldd.
The transport objects continue in suspended status with reconnect timers
running, and upon expiration, normal ref-counting will occur and the
objects will be freed. The transport object may still be held hostage
by the application/kernel module, but that is acceptable.
With this change, the lldd may be fully unloaded and reloaded, and
if registrations occur prior to the timeouts, the nvme controller and
namespaces will resume normally as if a link bounce.
Signed-off-by: James Smart <james.smart@broadcom.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
2017-11-03 23:13:17 +08:00
|
|
|
ctrl->assoc_active = false;
|
2018-11-15 08:35:10 +08:00
|
|
|
atomic_set(&ctrl->err_work_active, 0);
|
2017-10-10 04:39:44 +08:00
|
|
|
init_waitqueue_head(&ctrl->ioabort_wait);
|
2016-12-02 16:28:42 +08:00
|
|
|
|
|
|
|
get_device(ctrl->dev);
|
|
|
|
kref_init(&ctrl->ref);
|
|
|
|
|
2017-06-15 21:41:08 +08:00
|
|
|
INIT_WORK(&ctrl->ctrl.reset_work, nvme_fc_reset_ctrl_work);
|
2017-04-23 23:30:08 +08:00
|
|
|
INIT_DELAYED_WORK(&ctrl->connect_work, nvme_fc_connect_ctrl_work);
|
2018-11-15 08:35:10 +08:00
|
|
|
INIT_WORK(&ctrl->err_work, nvme_fc_connect_err_work);
|
2016-12-02 16:28:42 +08:00
|
|
|
spin_lock_init(&ctrl->lock);
|
|
|
|
|
|
|
|
/* io queue count */
|
2017-04-24 15:58:29 +08:00
|
|
|
ctrl->ctrl.queue_count = min_t(unsigned int,
|
2016-12-02 16:28:42 +08:00
|
|
|
opts->nr_io_queues,
|
|
|
|
lport->ops->max_hw_queues);
|
2017-04-24 15:58:29 +08:00
|
|
|
ctrl->ctrl.queue_count++; /* +1 for admin queue */
|
2016-12-02 16:28:42 +08:00
|
|
|
|
|
|
|
ctrl->ctrl.sqsize = opts->queue_size - 1;
|
|
|
|
ctrl->ctrl.kato = opts->kato;
|
nvme-fc: change controllers first connect to use reconnect path
Current code follows the framework that has been in the transports
from the beginning where initial link-side controller connect occurs
as part of "creating the controller". Thus that first connect fully
talks to the controller and obtains values that can then be used in
for blk-mq setup, etc. It also means that everything about the
controller is fully know before the "create controller" call returns.
This has several weaknesses:
- The initial create_ctrl call made by the cli will block for a long
time as wire transactions are performed synchronously. This delay
becomes longer if errors occur or connectivity is lost and retries
need to be performed.
- Code wise, it means there is a separate connect path for initial
controller connect vs the (same) steps used in the reconnect path.
- And as there's separate paths, it means there's separate error
handling and retry logic. It also plays havoc with the NEW state
(should transition out of it after successful initial connect) vs
the RESETTING and CONNECTING (reconnect) states that want to be
transitioned to on error.
- As there's separate paths, to recover from errors and disruptions,
it requires separate recovery/retry paths as well and can severely
convolute the controller state.
This patch reworks the fc transport to use the same connect paths
for the initial connection as it uses for reconnect. This makes a
single path for error recovery and handling.
This patch:
- Removes the driving of the initial connect and replaces it with
a state transition to CONNECTING and initiating the reconnect
thread. A dummy state transition of RESETTING had to be traversed
as a direct transtion of NEW->CONNECTING is not allowed. Given
that the controller is "new", the RESETTING transition is a simple
no-op. Once in the reconnecting thread, the normal behaviors of
ctrl_loss_tmo (max_retries * connect_delay) and dev_loss_tmo will
apply before the controller is torn down.
- Only if the state transitions couldn't be traversed and the
reconnect thread not scheduled, will the controller be torn down
while in create_ctrl.
- The prior code used the controller state of NEW to indicate
whether request queues had been initialized or not. For the admin
queue, the request queue is always created, so there's no need to
check a state. For IO queues, change to tracking whether a successful
io request queue create has occurred (e.g. 1st successful connect).
- The initial controller id is initialized to the dynamic controller
id used in the initial connect message. It will be overwritten by
the real controller id once the controller is connected on the wire.
Signed-off-by: James Smart <james.smart@broadcom.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
2018-06-14 05:07:37 +08:00
|
|
|
ctrl->ctrl.cntlid = 0xffff;
|
2016-12-02 16:28:42 +08:00
|
|
|
|
|
|
|
ret = -ENOMEM;
|
2017-04-24 15:58:29 +08:00
|
|
|
ctrl->queues = kcalloc(ctrl->ctrl.queue_count,
|
|
|
|
sizeof(struct nvme_fc_queue), GFP_KERNEL);
|
2016-12-02 16:28:42 +08:00
|
|
|
if (!ctrl->queues)
|
2017-04-23 23:30:08 +08:00
|
|
|
goto out_free_ida;
|
2016-12-02 16:28:42 +08:00
|
|
|
|
2018-06-14 05:07:38 +08:00
|
|
|
nvme_fc_init_queue(ctrl, 0);
|
|
|
|
|
2017-04-23 23:30:08 +08:00
|
|
|
memset(&ctrl->admin_tag_set, 0, sizeof(ctrl->admin_tag_set));
|
|
|
|
ctrl->admin_tag_set.ops = &nvme_fc_admin_mq_ops;
|
2017-11-08 06:13:10 +08:00
|
|
|
ctrl->admin_tag_set.queue_depth = NVME_AQ_MQ_TAG_DEPTH;
|
2017-04-23 23:30:08 +08:00
|
|
|
ctrl->admin_tag_set.reserved_tags = 2; /* fabric connect + Keep-Alive */
|
2018-11-16 16:22:29 +08:00
|
|
|
ctrl->admin_tag_set.numa_node = ctrl->ctrl.numa_node;
|
2018-10-09 05:28:45 +08:00
|
|
|
ctrl->admin_tag_set.cmd_size =
|
|
|
|
struct_size((struct nvme_fcp_op_w_sgl *)NULL, priv,
|
|
|
|
ctrl->lport->ops->fcprqst_priv_sz);
|
2017-04-23 23:30:08 +08:00
|
|
|
ctrl->admin_tag_set.driver_data = ctrl;
|
|
|
|
ctrl->admin_tag_set.nr_hw_queues = 1;
|
|
|
|
ctrl->admin_tag_set.timeout = ADMIN_TIMEOUT;
|
2017-10-18 20:38:25 +08:00
|
|
|
ctrl->admin_tag_set.flags = BLK_MQ_F_NO_SCHED;
|
2016-12-02 16:28:42 +08:00
|
|
|
|
2017-04-23 23:30:08 +08:00
|
|
|
ret = blk_mq_alloc_tag_set(&ctrl->admin_tag_set);
|
2016-12-02 16:28:42 +08:00
|
|
|
if (ret)
|
2017-04-23 23:30:08 +08:00
|
|
|
goto out_free_queues;
|
2017-07-10 14:22:29 +08:00
|
|
|
ctrl->ctrl.admin_tagset = &ctrl->admin_tag_set;
|
2016-12-02 16:28:42 +08:00
|
|
|
|
2019-08-03 10:33:59 +08:00
|
|
|
ctrl->ctrl.fabrics_q = blk_mq_init_queue(&ctrl->admin_tag_set);
|
|
|
|
if (IS_ERR(ctrl->ctrl.fabrics_q)) {
|
|
|
|
ret = PTR_ERR(ctrl->ctrl.fabrics_q);
|
|
|
|
goto out_free_admin_tag_set;
|
|
|
|
}
|
|
|
|
|
2017-04-23 23:30:08 +08:00
|
|
|
ctrl->ctrl.admin_q = blk_mq_init_queue(&ctrl->admin_tag_set);
|
|
|
|
if (IS_ERR(ctrl->ctrl.admin_q)) {
|
|
|
|
ret = PTR_ERR(ctrl->ctrl.admin_q);
|
2019-08-03 10:33:59 +08:00
|
|
|
goto out_cleanup_fabrics_q;
|
2016-12-02 16:28:42 +08:00
|
|
|
}
|
|
|
|
|
2017-04-23 23:30:08 +08:00
|
|
|
/*
|
|
|
|
* Would have been nice to init io queues tag set as well.
|
|
|
|
* However, we require interaction from the controller
|
|
|
|
* for max io queue count before we can do so.
|
|
|
|
* Defer this to the connect path.
|
|
|
|
*/
|
2016-12-02 16:28:42 +08:00
|
|
|
|
2017-04-23 23:30:08 +08:00
|
|
|
ret = nvme_init_ctrl(&ctrl->ctrl, dev, &nvme_fc_ctrl_ops, 0);
|
|
|
|
if (ret)
|
|
|
|
goto out_cleanup_admin_q;
|
2016-12-02 16:28:42 +08:00
|
|
|
|
2017-04-23 23:30:08 +08:00
|
|
|
/* at this point, teardown path changes to ref counting on nvme ctrl */
|
2016-12-02 16:28:42 +08:00
|
|
|
|
|
|
|
spin_lock_irqsave(&rport->lock, flags);
|
|
|
|
list_add_tail(&ctrl->ctrl_list, &rport->ctrl_list);
|
|
|
|
spin_unlock_irqrestore(&rport->lock, flags);
|
|
|
|
|
nvme-fc: change controllers first connect to use reconnect path
Current code follows the framework that has been in the transports
from the beginning where initial link-side controller connect occurs
as part of "creating the controller". Thus that first connect fully
talks to the controller and obtains values that can then be used in
for blk-mq setup, etc. It also means that everything about the
controller is fully know before the "create controller" call returns.
This has several weaknesses:
- The initial create_ctrl call made by the cli will block for a long
time as wire transactions are performed synchronously. This delay
becomes longer if errors occur or connectivity is lost and retries
need to be performed.
- Code wise, it means there is a separate connect path for initial
controller connect vs the (same) steps used in the reconnect path.
- And as there's separate paths, it means there's separate error
handling and retry logic. It also plays havoc with the NEW state
(should transition out of it after successful initial connect) vs
the RESETTING and CONNECTING (reconnect) states that want to be
transitioned to on error.
- As there's separate paths, to recover from errors and disruptions,
it requires separate recovery/retry paths as well and can severely
convolute the controller state.
This patch reworks the fc transport to use the same connect paths
for the initial connection as it uses for reconnect. This makes a
single path for error recovery and handling.
This patch:
- Removes the driving of the initial connect and replaces it with
a state transition to CONNECTING and initiating the reconnect
thread. A dummy state transition of RESETTING had to be traversed
as a direct transtion of NEW->CONNECTING is not allowed. Given
that the controller is "new", the RESETTING transition is a simple
no-op. Once in the reconnecting thread, the normal behaviors of
ctrl_loss_tmo (max_retries * connect_delay) and dev_loss_tmo will
apply before the controller is torn down.
- Only if the state transitions couldn't be traversed and the
reconnect thread not scheduled, will the controller be torn down
while in create_ctrl.
- The prior code used the controller state of NEW to indicate
whether request queues had been initialized or not. For the admin
queue, the request queue is always created, so there's no need to
check a state. For IO queues, change to tracking whether a successful
io request queue create has occurred (e.g. 1st successful connect).
- The initial controller id is initialized to the dynamic controller
id used in the initial connect message. It will be overwritten by
the real controller id once the controller is connected on the wire.
Signed-off-by: James Smart <james.smart@broadcom.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
2018-06-14 05:07:37 +08:00
|
|
|
if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RESETTING) ||
|
|
|
|
!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING)) {
|
2017-10-10 07:39:22 +08:00
|
|
|
dev_err(ctrl->ctrl.device,
|
nvme-fc: change controllers first connect to use reconnect path
Current code follows the framework that has been in the transports
from the beginning where initial link-side controller connect occurs
as part of "creating the controller". Thus that first connect fully
talks to the controller and obtains values that can then be used in
for blk-mq setup, etc. It also means that everything about the
controller is fully know before the "create controller" call returns.
This has several weaknesses:
- The initial create_ctrl call made by the cli will block for a long
time as wire transactions are performed synchronously. This delay
becomes longer if errors occur or connectivity is lost and retries
need to be performed.
- Code wise, it means there is a separate connect path for initial
controller connect vs the (same) steps used in the reconnect path.
- And as there's separate paths, it means there's separate error
handling and retry logic. It also plays havoc with the NEW state
(should transition out of it after successful initial connect) vs
the RESETTING and CONNECTING (reconnect) states that want to be
transitioned to on error.
- As there's separate paths, to recover from errors and disruptions,
it requires separate recovery/retry paths as well and can severely
convolute the controller state.
This patch reworks the fc transport to use the same connect paths
for the initial connection as it uses for reconnect. This makes a
single path for error recovery and handling.
This patch:
- Removes the driving of the initial connect and replaces it with
a state transition to CONNECTING and initiating the reconnect
thread. A dummy state transition of RESETTING had to be traversed
as a direct transtion of NEW->CONNECTING is not allowed. Given
that the controller is "new", the RESETTING transition is a simple
no-op. Once in the reconnecting thread, the normal behaviors of
ctrl_loss_tmo (max_retries * connect_delay) and dev_loss_tmo will
apply before the controller is torn down.
- Only if the state transitions couldn't be traversed and the
reconnect thread not scheduled, will the controller be torn down
while in create_ctrl.
- The prior code used the controller state of NEW to indicate
whether request queues had been initialized or not. For the admin
queue, the request queue is always created, so there's no need to
check a state. For IO queues, change to tracking whether a successful
io request queue create has occurred (e.g. 1st successful connect).
- The initial controller id is initialized to the dynamic controller
id used in the initial connect message. It will be overwritten by
the real controller id once the controller is connected on the wire.
Signed-off-by: James Smart <james.smart@broadcom.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
2018-06-14 05:07:37 +08:00
|
|
|
"NVME-FC{%d}: failed to init ctrl state\n", ctrl->cnum);
|
|
|
|
goto fail_ctrl;
|
|
|
|
}
|
2017-10-10 07:39:22 +08:00
|
|
|
|
nvme-fc: change controllers first connect to use reconnect path
Current code follows the framework that has been in the transports
from the beginning where initial link-side controller connect occurs
as part of "creating the controller". Thus that first connect fully
talks to the controller and obtains values that can then be used in
for blk-mq setup, etc. It also means that everything about the
controller is fully know before the "create controller" call returns.
This has several weaknesses:
- The initial create_ctrl call made by the cli will block for a long
time as wire transactions are performed synchronously. This delay
becomes longer if errors occur or connectivity is lost and retries
need to be performed.
- Code wise, it means there is a separate connect path for initial
controller connect vs the (same) steps used in the reconnect path.
- And as there's separate paths, it means there's separate error
handling and retry logic. It also plays havoc with the NEW state
(should transition out of it after successful initial connect) vs
the RESETTING and CONNECTING (reconnect) states that want to be
transitioned to on error.
- As there's separate paths, to recover from errors and disruptions,
it requires separate recovery/retry paths as well and can severely
convolute the controller state.
This patch reworks the fc transport to use the same connect paths
for the initial connection as it uses for reconnect. This makes a
single path for error recovery and handling.
This patch:
- Removes the driving of the initial connect and replaces it with
a state transition to CONNECTING and initiating the reconnect
thread. A dummy state transition of RESETTING had to be traversed
as a direct transtion of NEW->CONNECTING is not allowed. Given
that the controller is "new", the RESETTING transition is a simple
no-op. Once in the reconnecting thread, the normal behaviors of
ctrl_loss_tmo (max_retries * connect_delay) and dev_loss_tmo will
apply before the controller is torn down.
- Only if the state transitions couldn't be traversed and the
reconnect thread not scheduled, will the controller be torn down
while in create_ctrl.
- The prior code used the controller state of NEW to indicate
whether request queues had been initialized or not. For the admin
queue, the request queue is always created, so there's no need to
check a state. For IO queues, change to tracking whether a successful
io request queue create has occurred (e.g. 1st successful connect).
- The initial controller id is initialized to the dynamic controller
id used in the initial connect message. It will be overwritten by
the real controller id once the controller is connected on the wire.
Signed-off-by: James Smart <james.smart@broadcom.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
2018-06-14 05:07:37 +08:00
|
|
|
nvme_get_ctrl(&ctrl->ctrl);
|
2017-04-23 23:30:08 +08:00
|
|
|
|
nvme-fc: change controllers first connect to use reconnect path
Current code follows the framework that has been in the transports
from the beginning where initial link-side controller connect occurs
as part of "creating the controller". Thus that first connect fully
talks to the controller and obtains values that can then be used in
for blk-mq setup, etc. It also means that everything about the
controller is fully know before the "create controller" call returns.
This has several weaknesses:
- The initial create_ctrl call made by the cli will block for a long
time as wire transactions are performed synchronously. This delay
becomes longer if errors occur or connectivity is lost and retries
need to be performed.
- Code wise, it means there is a separate connect path for initial
controller connect vs the (same) steps used in the reconnect path.
- And as there's separate paths, it means there's separate error
handling and retry logic. It also plays havoc with the NEW state
(should transition out of it after successful initial connect) vs
the RESETTING and CONNECTING (reconnect) states that want to be
transitioned to on error.
- As there's separate paths, to recover from errors and disruptions,
it requires separate recovery/retry paths as well and can severely
convolute the controller state.
This patch reworks the fc transport to use the same connect paths
for the initial connection as it uses for reconnect. This makes a
single path for error recovery and handling.
This patch:
- Removes the driving of the initial connect and replaces it with
a state transition to CONNECTING and initiating the reconnect
thread. A dummy state transition of RESETTING had to be traversed
as a direct transtion of NEW->CONNECTING is not allowed. Given
that the controller is "new", the RESETTING transition is a simple
no-op. Once in the reconnecting thread, the normal behaviors of
ctrl_loss_tmo (max_retries * connect_delay) and dev_loss_tmo will
apply before the controller is torn down.
- Only if the state transitions couldn't be traversed and the
reconnect thread not scheduled, will the controller be torn down
while in create_ctrl.
- The prior code used the controller state of NEW to indicate
whether request queues had been initialized or not. For the admin
queue, the request queue is always created, so there's no need to
check a state. For IO queues, change to tracking whether a successful
io request queue create has occurred (e.g. 1st successful connect).
- The initial controller id is initialized to the dynamic controller
id used in the initial connect message. It will be overwritten by
the real controller id once the controller is connected on the wire.
Signed-off-by: James Smart <james.smart@broadcom.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
2018-06-14 05:07:37 +08:00
|
|
|
if (!queue_delayed_work(nvme_wq, &ctrl->connect_work, 0)) {
|
2017-06-16 14:40:54 +08:00
|
|
|
nvme_put_ctrl(&ctrl->ctrl);
|
nvme-fc: change controllers first connect to use reconnect path
Current code follows the framework that has been in the transports
from the beginning where initial link-side controller connect occurs
as part of "creating the controller". Thus that first connect fully
talks to the controller and obtains values that can then be used in
for blk-mq setup, etc. It also means that everything about the
controller is fully know before the "create controller" call returns.
This has several weaknesses:
- The initial create_ctrl call made by the cli will block for a long
time as wire transactions are performed synchronously. This delay
becomes longer if errors occur or connectivity is lost and retries
need to be performed.
- Code wise, it means there is a separate connect path for initial
controller connect vs the (same) steps used in the reconnect path.
- And as there's separate paths, it means there's separate error
handling and retry logic. It also plays havoc with the NEW state
(should transition out of it after successful initial connect) vs
the RESETTING and CONNECTING (reconnect) states that want to be
transitioned to on error.
- As there's separate paths, to recover from errors and disruptions,
it requires separate recovery/retry paths as well and can severely
convolute the controller state.
This patch reworks the fc transport to use the same connect paths
for the initial connection as it uses for reconnect. This makes a
single path for error recovery and handling.
This patch:
- Removes the driving of the initial connect and replaces it with
a state transition to CONNECTING and initiating the reconnect
thread. A dummy state transition of RESETTING had to be traversed
as a direct transtion of NEW->CONNECTING is not allowed. Given
that the controller is "new", the RESETTING transition is a simple
no-op. Once in the reconnecting thread, the normal behaviors of
ctrl_loss_tmo (max_retries * connect_delay) and dev_loss_tmo will
apply before the controller is torn down.
- Only if the state transitions couldn't be traversed and the
reconnect thread not scheduled, will the controller be torn down
while in create_ctrl.
- The prior code used the controller state of NEW to indicate
whether request queues had been initialized or not. For the admin
queue, the request queue is always created, so there's no need to
check a state. For IO queues, change to tracking whether a successful
io request queue create has occurred (e.g. 1st successful connect).
- The initial controller id is initialized to the dynamic controller
id used in the initial connect message. It will be overwritten by
the real controller id once the controller is connected on the wire.
Signed-off-by: James Smart <james.smart@broadcom.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
2018-06-14 05:07:37 +08:00
|
|
|
dev_err(ctrl->ctrl.device,
|
|
|
|
"NVME-FC{%d}: failed to schedule initial connect\n",
|
|
|
|
ctrl->cnum);
|
|
|
|
goto fail_ctrl;
|
2016-12-02 16:28:42 +08:00
|
|
|
}
|
|
|
|
|
nvme-fc: change controllers first connect to use reconnect path
Current code follows the framework that has been in the transports
from the beginning where initial link-side controller connect occurs
as part of "creating the controller". Thus that first connect fully
talks to the controller and obtains values that can then be used in
for blk-mq setup, etc. It also means that everything about the
controller is fully know before the "create controller" call returns.
This has several weaknesses:
- The initial create_ctrl call made by the cli will block for a long
time as wire transactions are performed synchronously. This delay
becomes longer if errors occur or connectivity is lost and retries
need to be performed.
- Code wise, it means there is a separate connect path for initial
controller connect vs the (same) steps used in the reconnect path.
- And as there's separate paths, it means there's separate error
handling and retry logic. It also plays havoc with the NEW state
(should transition out of it after successful initial connect) vs
the RESETTING and CONNECTING (reconnect) states that want to be
transitioned to on error.
- As there's separate paths, to recover from errors and disruptions,
it requires separate recovery/retry paths as well and can severely
convolute the controller state.
This patch reworks the fc transport to use the same connect paths
for the initial connection as it uses for reconnect. This makes a
single path for error recovery and handling.
This patch:
- Removes the driving of the initial connect and replaces it with
a state transition to CONNECTING and initiating the reconnect
thread. A dummy state transition of RESETTING had to be traversed
as a direct transtion of NEW->CONNECTING is not allowed. Given
that the controller is "new", the RESETTING transition is a simple
no-op. Once in the reconnecting thread, the normal behaviors of
ctrl_loss_tmo (max_retries * connect_delay) and dev_loss_tmo will
apply before the controller is torn down.
- Only if the state transitions couldn't be traversed and the
reconnect thread not scheduled, will the controller be torn down
while in create_ctrl.
- The prior code used the controller state of NEW to indicate
whether request queues had been initialized or not. For the admin
queue, the request queue is always created, so there's no need to
check a state. For IO queues, change to tracking whether a successful
io request queue create has occurred (e.g. 1st successful connect).
- The initial controller id is initialized to the dynamic controller
id used in the initial connect message. It will be overwritten by
the real controller id once the controller is connected on the wire.
Signed-off-by: James Smart <james.smart@broadcom.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
2018-06-14 05:07:37 +08:00
|
|
|
flush_delayed_work(&ctrl->connect_work);
|
2017-05-16 08:10:22 +08:00
|
|
|
|
2017-04-23 23:30:08 +08:00
|
|
|
dev_info(ctrl->ctrl.device,
|
|
|
|
"NVME-FC{%d}: new ctrl: NQN \"%s\"\n",
|
|
|
|
ctrl->cnum, ctrl->ctrl.opts->subsysnqn);
|
2016-12-02 16:28:42 +08:00
|
|
|
|
2017-04-23 23:30:08 +08:00
|
|
|
return &ctrl->ctrl;
|
2016-12-02 16:28:42 +08:00
|
|
|
|
nvme-fc: change controllers first connect to use reconnect path
Current code follows the framework that has been in the transports
from the beginning where initial link-side controller connect occurs
as part of "creating the controller". Thus that first connect fully
talks to the controller and obtains values that can then be used in
for blk-mq setup, etc. It also means that everything about the
controller is fully know before the "create controller" call returns.
This has several weaknesses:
- The initial create_ctrl call made by the cli will block for a long
time as wire transactions are performed synchronously. This delay
becomes longer if errors occur or connectivity is lost and retries
need to be performed.
- Code wise, it means there is a separate connect path for initial
controller connect vs the (same) steps used in the reconnect path.
- And as there's separate paths, it means there's separate error
handling and retry logic. It also plays havoc with the NEW state
(should transition out of it after successful initial connect) vs
the RESETTING and CONNECTING (reconnect) states that want to be
transitioned to on error.
- As there's separate paths, to recover from errors and disruptions,
it requires separate recovery/retry paths as well and can severely
convolute the controller state.
This patch reworks the fc transport to use the same connect paths
for the initial connection as it uses for reconnect. This makes a
single path for error recovery and handling.
This patch:
- Removes the driving of the initial connect and replaces it with
a state transition to CONNECTING and initiating the reconnect
thread. A dummy state transition of RESETTING had to be traversed
as a direct transtion of NEW->CONNECTING is not allowed. Given
that the controller is "new", the RESETTING transition is a simple
no-op. Once in the reconnecting thread, the normal behaviors of
ctrl_loss_tmo (max_retries * connect_delay) and dev_loss_tmo will
apply before the controller is torn down.
- Only if the state transitions couldn't be traversed and the
reconnect thread not scheduled, will the controller be torn down
while in create_ctrl.
- The prior code used the controller state of NEW to indicate
whether request queues had been initialized or not. For the admin
queue, the request queue is always created, so there's no need to
check a state. For IO queues, change to tracking whether a successful
io request queue create has occurred (e.g. 1st successful connect).
- The initial controller id is initialized to the dynamic controller
id used in the initial connect message. It will be overwritten by
the real controller id once the controller is connected on the wire.
Signed-off-by: James Smart <james.smart@broadcom.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
2018-06-14 05:07:37 +08:00
|
|
|
fail_ctrl:
|
|
|
|
nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_DELETING);
|
|
|
|
cancel_work_sync(&ctrl->ctrl.reset_work);
|
2018-11-15 08:35:10 +08:00
|
|
|
cancel_work_sync(&ctrl->err_work);
|
nvme-fc: change controllers first connect to use reconnect path
Current code follows the framework that has been in the transports
from the beginning where initial link-side controller connect occurs
as part of "creating the controller". Thus that first connect fully
talks to the controller and obtains values that can then be used in
for blk-mq setup, etc. It also means that everything about the
controller is fully know before the "create controller" call returns.
This has several weaknesses:
- The initial create_ctrl call made by the cli will block for a long
time as wire transactions are performed synchronously. This delay
becomes longer if errors occur or connectivity is lost and retries
need to be performed.
- Code wise, it means there is a separate connect path for initial
controller connect vs the (same) steps used in the reconnect path.
- And as there's separate paths, it means there's separate error
handling and retry logic. It also plays havoc with the NEW state
(should transition out of it after successful initial connect) vs
the RESETTING and CONNECTING (reconnect) states that want to be
transitioned to on error.
- As there's separate paths, to recover from errors and disruptions,
it requires separate recovery/retry paths as well and can severely
convolute the controller state.
This patch reworks the fc transport to use the same connect paths
for the initial connection as it uses for reconnect. This makes a
single path for error recovery and handling.
This patch:
- Removes the driving of the initial connect and replaces it with
a state transition to CONNECTING and initiating the reconnect
thread. A dummy state transition of RESETTING had to be traversed
as a direct transtion of NEW->CONNECTING is not allowed. Given
that the controller is "new", the RESETTING transition is a simple
no-op. Once in the reconnecting thread, the normal behaviors of
ctrl_loss_tmo (max_retries * connect_delay) and dev_loss_tmo will
apply before the controller is torn down.
- Only if the state transitions couldn't be traversed and the
reconnect thread not scheduled, will the controller be torn down
while in create_ctrl.
- The prior code used the controller state of NEW to indicate
whether request queues had been initialized or not. For the admin
queue, the request queue is always created, so there's no need to
check a state. For IO queues, change to tracking whether a successful
io request queue create has occurred (e.g. 1st successful connect).
- The initial controller id is initialized to the dynamic controller
id used in the initial connect message. It will be overwritten by
the real controller id once the controller is connected on the wire.
Signed-off-by: James Smart <james.smart@broadcom.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
2018-06-14 05:07:37 +08:00
|
|
|
cancel_delayed_work_sync(&ctrl->connect_work);
|
|
|
|
|
|
|
|
ctrl->ctrl.opts = NULL;
|
|
|
|
|
|
|
|
/* initiate nvme ctrl ref counting teardown */
|
|
|
|
nvme_uninit_ctrl(&ctrl->ctrl);
|
|
|
|
|
|
|
|
/* Remove core ctrl ref. */
|
|
|
|
nvme_put_ctrl(&ctrl->ctrl);
|
|
|
|
|
|
|
|
/* as we're past the point where we transition to the ref
|
|
|
|
* counting teardown path, if we return a bad pointer here,
|
|
|
|
* the calling routine, thinking it's prior to the
|
|
|
|
* transition, will do an rport put. Since the teardown
|
|
|
|
* path also does a rport put, we do an extra get here to
|
|
|
|
* so proper order/teardown happens.
|
|
|
|
*/
|
|
|
|
nvme_fc_rport_get(rport);
|
|
|
|
|
|
|
|
return ERR_PTR(-EIO);
|
|
|
|
|
2017-04-23 23:30:08 +08:00
|
|
|
out_cleanup_admin_q:
|
|
|
|
blk_cleanup_queue(ctrl->ctrl.admin_q);
|
2019-08-03 10:33:59 +08:00
|
|
|
out_cleanup_fabrics_q:
|
|
|
|
blk_cleanup_queue(ctrl->ctrl.fabrics_q);
|
2017-04-23 23:30:08 +08:00
|
|
|
out_free_admin_tag_set:
|
|
|
|
blk_mq_free_tag_set(&ctrl->admin_tag_set);
|
|
|
|
out_free_queues:
|
|
|
|
kfree(ctrl->queues);
|
2016-12-02 16:28:42 +08:00
|
|
|
out_free_ida:
|
2017-04-23 23:30:08 +08:00
|
|
|
put_device(ctrl->dev);
|
2016-12-02 16:28:42 +08:00
|
|
|
ida_simple_remove(&nvme_fc_ctrl_cnt, ctrl->cnum);
|
|
|
|
out_free_ctrl:
|
|
|
|
kfree(ctrl);
|
|
|
|
out_fail:
|
|
|
|
/* exit via here doesn't follow ctlr ref points */
|
|
|
|
return ERR_PTR(ret);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
struct nvmet_fc_traddr {
|
|
|
|
u64 nn;
|
|
|
|
u64 pn;
|
|
|
|
};
|
|
|
|
|
|
|
|
static int
|
2017-07-18 04:59:39 +08:00
|
|
|
__nvme_fc_parse_u64(substring_t *sstr, u64 *val)
|
2016-12-02 16:28:42 +08:00
|
|
|
{
|
|
|
|
u64 token64;
|
|
|
|
|
2017-07-18 04:59:39 +08:00
|
|
|
if (match_u64(sstr, &token64))
|
|
|
|
return -EINVAL;
|
|
|
|
*val = token64;
|
2016-12-02 16:28:42 +08:00
|
|
|
|
2017-07-18 04:59:39 +08:00
|
|
|
return 0;
|
|
|
|
}
|
2016-12-02 16:28:42 +08:00
|
|
|
|
2017-07-18 04:59:39 +08:00
|
|
|
/*
|
|
|
|
* This routine validates and extracts the WWN's from the TRADDR string.
|
|
|
|
* As kernel parsers need the 0x to determine number base, universally
|
|
|
|
* build string to parse with 0x prefix before parsing name strings.
|
|
|
|
*/
|
|
|
|
static int
|
|
|
|
nvme_fc_parse_traddr(struct nvmet_fc_traddr *traddr, char *buf, size_t blen)
|
|
|
|
{
|
|
|
|
char name[2 + NVME_FC_TRADDR_HEXNAMELEN + 1];
|
|
|
|
substring_t wwn = { name, &name[sizeof(name)-1] };
|
|
|
|
int nnoffset, pnoffset;
|
|
|
|
|
2018-08-10 17:24:02 +08:00
|
|
|
/* validate if string is one of the 2 allowed formats */
|
2017-07-18 04:59:39 +08:00
|
|
|
if (strnlen(buf, blen) == NVME_FC_TRADDR_MAXLENGTH &&
|
|
|
|
!strncmp(buf, "nn-0x", NVME_FC_TRADDR_OXNNLEN) &&
|
|
|
|
!strncmp(&buf[NVME_FC_TRADDR_MAX_PN_OFFSET],
|
|
|
|
"pn-0x", NVME_FC_TRADDR_OXNNLEN)) {
|
|
|
|
nnoffset = NVME_FC_TRADDR_OXNNLEN;
|
|
|
|
pnoffset = NVME_FC_TRADDR_MAX_PN_OFFSET +
|
|
|
|
NVME_FC_TRADDR_OXNNLEN;
|
|
|
|
} else if ((strnlen(buf, blen) == NVME_FC_TRADDR_MINLENGTH &&
|
|
|
|
!strncmp(buf, "nn-", NVME_FC_TRADDR_NNLEN) &&
|
|
|
|
!strncmp(&buf[NVME_FC_TRADDR_MIN_PN_OFFSET],
|
|
|
|
"pn-", NVME_FC_TRADDR_NNLEN))) {
|
|
|
|
nnoffset = NVME_FC_TRADDR_NNLEN;
|
|
|
|
pnoffset = NVME_FC_TRADDR_MIN_PN_OFFSET + NVME_FC_TRADDR_NNLEN;
|
|
|
|
} else
|
|
|
|
goto out_einval;
|
2016-12-02 16:28:42 +08:00
|
|
|
|
2017-07-18 04:59:39 +08:00
|
|
|
name[0] = '0';
|
|
|
|
name[1] = 'x';
|
|
|
|
name[2 + NVME_FC_TRADDR_HEXNAMELEN] = 0;
|
|
|
|
|
|
|
|
memcpy(&name[2], &buf[nnoffset], NVME_FC_TRADDR_HEXNAMELEN);
|
|
|
|
if (__nvme_fc_parse_u64(&wwn, &traddr->nn))
|
|
|
|
goto out_einval;
|
|
|
|
|
|
|
|
memcpy(&name[2], &buf[pnoffset], NVME_FC_TRADDR_HEXNAMELEN);
|
|
|
|
if (__nvme_fc_parse_u64(&wwn, &traddr->pn))
|
|
|
|
goto out_einval;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
out_einval:
|
|
|
|
pr_warn("%s: bad traddr string\n", __func__);
|
|
|
|
return -EINVAL;
|
2016-12-02 16:28:42 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static struct nvme_ctrl *
|
|
|
|
nvme_fc_create_ctrl(struct device *dev, struct nvmf_ctrl_options *opts)
|
|
|
|
{
|
|
|
|
struct nvme_fc_lport *lport;
|
|
|
|
struct nvme_fc_rport *rport;
|
2017-04-23 23:30:08 +08:00
|
|
|
struct nvme_ctrl *ctrl;
|
2016-12-02 16:28:42 +08:00
|
|
|
struct nvmet_fc_traddr laddr = { 0L, 0L };
|
|
|
|
struct nvmet_fc_traddr raddr = { 0L, 0L };
|
|
|
|
unsigned long flags;
|
|
|
|
int ret;
|
|
|
|
|
2017-07-18 04:59:39 +08:00
|
|
|
ret = nvme_fc_parse_traddr(&raddr, opts->traddr, NVMF_TRADDR_SIZE);
|
2016-12-02 16:28:42 +08:00
|
|
|
if (ret || !raddr.nn || !raddr.pn)
|
|
|
|
return ERR_PTR(-EINVAL);
|
|
|
|
|
2017-07-18 04:59:39 +08:00
|
|
|
ret = nvme_fc_parse_traddr(&laddr, opts->host_traddr, NVMF_TRADDR_SIZE);
|
2016-12-02 16:28:42 +08:00
|
|
|
if (ret || !laddr.nn || !laddr.pn)
|
|
|
|
return ERR_PTR(-EINVAL);
|
|
|
|
|
|
|
|
/* find the host and remote ports to connect together */
|
|
|
|
spin_lock_irqsave(&nvme_fc_lock, flags);
|
|
|
|
list_for_each_entry(lport, &nvme_fc_lport_list, port_list) {
|
|
|
|
if (lport->localport.node_name != laddr.nn ||
|
|
|
|
lport->localport.port_name != laddr.pn)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
list_for_each_entry(rport, &lport->endp_list, endp_list) {
|
|
|
|
if (rport->remoteport.node_name != raddr.nn ||
|
|
|
|
rport->remoteport.port_name != raddr.pn)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
/* if fail to get reference fall through. Will error */
|
|
|
|
if (!nvme_fc_rport_get(rport))
|
|
|
|
break;
|
|
|
|
|
|
|
|
spin_unlock_irqrestore(&nvme_fc_lock, flags);
|
|
|
|
|
2017-04-23 23:30:08 +08:00
|
|
|
ctrl = nvme_fc_init_ctrl(dev, opts, lport, rport);
|
|
|
|
if (IS_ERR(ctrl))
|
|
|
|
nvme_fc_rport_put(rport);
|
|
|
|
return ctrl;
|
2016-12-02 16:28:42 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
spin_unlock_irqrestore(&nvme_fc_lock, flags);
|
|
|
|
|
2018-04-20 01:43:42 +08:00
|
|
|
pr_warn("%s: %s - %s combination not found\n",
|
|
|
|
__func__, opts->traddr, opts->host_traddr);
|
2016-12-02 16:28:42 +08:00
|
|
|
return ERR_PTR(-ENOENT);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
static struct nvmf_transport_ops nvme_fc_transport = {
|
|
|
|
.name = "fc",
|
2017-12-25 20:18:30 +08:00
|
|
|
.module = THIS_MODULE,
|
2016-12-02 16:28:42 +08:00
|
|
|
.required_opts = NVMF_OPT_TRADDR | NVMF_OPT_HOST_TRADDR,
|
2017-05-16 08:10:16 +08:00
|
|
|
.allowed_opts = NVMF_OPT_RECONNECT_DELAY | NVMF_OPT_CTRL_LOSS_TMO,
|
2016-12-02 16:28:42 +08:00
|
|
|
.create_ctrl = nvme_fc_create_ctrl,
|
|
|
|
};
|
|
|
|
|
2018-09-14 07:17:38 +08:00
|
|
|
/* Arbitrary successive failures max. With lots of subsystems could be high */
|
|
|
|
#define DISCOVERY_MAX_FAIL 20
|
|
|
|
|
|
|
|
static ssize_t nvme_fc_nvme_discovery_store(struct device *dev,
|
|
|
|
struct device_attribute *attr, const char *buf, size_t count)
|
|
|
|
{
|
|
|
|
unsigned long flags;
|
|
|
|
LIST_HEAD(local_disc_list);
|
|
|
|
struct nvme_fc_lport *lport;
|
|
|
|
struct nvme_fc_rport *rport;
|
|
|
|
int failcnt = 0;
|
|
|
|
|
|
|
|
spin_lock_irqsave(&nvme_fc_lock, flags);
|
|
|
|
restart:
|
|
|
|
list_for_each_entry(lport, &nvme_fc_lport_list, port_list) {
|
|
|
|
list_for_each_entry(rport, &lport->endp_list, endp_list) {
|
|
|
|
if (!nvme_fc_lport_get(lport))
|
|
|
|
continue;
|
|
|
|
if (!nvme_fc_rport_get(rport)) {
|
|
|
|
/*
|
|
|
|
* This is a temporary condition. Upon restart
|
|
|
|
* this rport will be gone from the list.
|
|
|
|
*
|
|
|
|
* Revert the lport put and retry. Anything
|
|
|
|
* added to the list already will be skipped (as
|
|
|
|
* they are no longer list_empty). Loops should
|
|
|
|
* resume at rports that were not yet seen.
|
|
|
|
*/
|
|
|
|
nvme_fc_lport_put(lport);
|
|
|
|
|
|
|
|
if (failcnt++ < DISCOVERY_MAX_FAIL)
|
|
|
|
goto restart;
|
|
|
|
|
|
|
|
pr_err("nvme_discovery: too many reference "
|
|
|
|
"failures\n");
|
|
|
|
goto process_local_list;
|
|
|
|
}
|
|
|
|
if (list_empty(&rport->disc_list))
|
|
|
|
list_add_tail(&rport->disc_list,
|
|
|
|
&local_disc_list);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
process_local_list:
|
|
|
|
while (!list_empty(&local_disc_list)) {
|
|
|
|
rport = list_first_entry(&local_disc_list,
|
|
|
|
struct nvme_fc_rport, disc_list);
|
|
|
|
list_del_init(&rport->disc_list);
|
|
|
|
spin_unlock_irqrestore(&nvme_fc_lock, flags);
|
|
|
|
|
|
|
|
lport = rport->lport;
|
|
|
|
/* signal discovery. Won't hurt if it repeats */
|
|
|
|
nvme_fc_signal_discovery_scan(lport, rport);
|
|
|
|
nvme_fc_rport_put(rport);
|
|
|
|
nvme_fc_lport_put(lport);
|
|
|
|
|
|
|
|
spin_lock_irqsave(&nvme_fc_lock, flags);
|
|
|
|
}
|
|
|
|
spin_unlock_irqrestore(&nvme_fc_lock, flags);
|
|
|
|
|
|
|
|
return count;
|
|
|
|
}
|
|
|
|
static DEVICE_ATTR(nvme_discovery, 0200, NULL, nvme_fc_nvme_discovery_store);
|
|
|
|
|
|
|
|
static struct attribute *nvme_fc_attrs[] = {
|
|
|
|
&dev_attr_nvme_discovery.attr,
|
|
|
|
NULL
|
|
|
|
};
|
|
|
|
|
|
|
|
static struct attribute_group nvme_fc_attr_group = {
|
|
|
|
.attrs = nvme_fc_attrs,
|
|
|
|
};
|
|
|
|
|
|
|
|
static const struct attribute_group *nvme_fc_attr_groups[] = {
|
|
|
|
&nvme_fc_attr_group,
|
|
|
|
NULL
|
|
|
|
};
|
|
|
|
|
|
|
|
static struct class fc_class = {
|
|
|
|
.name = "fc",
|
|
|
|
.dev_groups = nvme_fc_attr_groups,
|
|
|
|
.owner = THIS_MODULE,
|
|
|
|
};
|
|
|
|
|
2016-12-02 16:28:42 +08:00
|
|
|
static int __init nvme_fc_init_module(void)
|
|
|
|
{
|
2017-09-15 01:38:41 +08:00
|
|
|
int ret;
|
|
|
|
|
2019-05-03 17:43:52 +08:00
|
|
|
nvme_fc_wq = alloc_workqueue("nvme_fc_wq", WQ_MEM_RECLAIM, 0);
|
|
|
|
if (!nvme_fc_wq)
|
|
|
|
return -ENOMEM;
|
|
|
|
|
2017-09-15 01:38:41 +08:00
|
|
|
/*
|
|
|
|
* NOTE:
|
|
|
|
* It is expected that in the future the kernel will combine
|
|
|
|
* the FC-isms that are currently under scsi and now being
|
|
|
|
* added to by NVME into a new standalone FC class. The SCSI
|
|
|
|
* and NVME protocols and their devices would be under this
|
|
|
|
* new FC class.
|
|
|
|
*
|
|
|
|
* As we need something to post FC-specific udev events to,
|
|
|
|
* specifically for nvme probe events, start by creating the
|
|
|
|
* new device class. When the new standalone FC class is
|
|
|
|
* put in place, this code will move to a more generic
|
|
|
|
* location for the class.
|
|
|
|
*/
|
2018-09-14 07:17:38 +08:00
|
|
|
ret = class_register(&fc_class);
|
|
|
|
if (ret) {
|
2017-09-15 01:38:41 +08:00
|
|
|
pr_err("couldn't register class fc\n");
|
2019-05-03 17:43:52 +08:00
|
|
|
goto out_destroy_wq;
|
2017-09-15 01:38:41 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Create a device for the FC-centric udev events
|
|
|
|
*/
|
2018-09-14 07:17:38 +08:00
|
|
|
fc_udev_device = device_create(&fc_class, NULL, MKDEV(0, 0), NULL,
|
2017-09-15 01:38:41 +08:00
|
|
|
"fc_udev_device");
|
|
|
|
if (IS_ERR(fc_udev_device)) {
|
|
|
|
pr_err("couldn't create fc_udev device!\n");
|
|
|
|
ret = PTR_ERR(fc_udev_device);
|
|
|
|
goto out_destroy_class;
|
|
|
|
}
|
|
|
|
|
|
|
|
ret = nvmf_register_transport(&nvme_fc_transport);
|
|
|
|
if (ret)
|
|
|
|
goto out_destroy_device;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
out_destroy_device:
|
2018-09-14 07:17:38 +08:00
|
|
|
device_destroy(&fc_class, MKDEV(0, 0));
|
2017-09-15 01:38:41 +08:00
|
|
|
out_destroy_class:
|
2018-09-14 07:17:38 +08:00
|
|
|
class_unregister(&fc_class);
|
2019-05-03 17:43:52 +08:00
|
|
|
out_destroy_wq:
|
|
|
|
destroy_workqueue(nvme_fc_wq);
|
|
|
|
|
2017-09-15 01:38:41 +08:00
|
|
|
return ret;
|
2016-12-02 16:28:42 +08:00
|
|
|
}
|
|
|
|
|
2019-06-29 08:26:08 +08:00
|
|
|
static void
|
|
|
|
nvme_fc_delete_controllers(struct nvme_fc_rport *rport)
|
|
|
|
{
|
|
|
|
struct nvme_fc_ctrl *ctrl;
|
|
|
|
|
|
|
|
spin_lock(&rport->lock);
|
|
|
|
list_for_each_entry(ctrl, &rport->ctrl_list, ctrl_list) {
|
|
|
|
dev_warn(ctrl->ctrl.device,
|
|
|
|
"NVME-FC{%d}: transport unloading: deleting ctrl\n",
|
|
|
|
ctrl->cnum);
|
|
|
|
nvme_delete_ctrl(&ctrl->ctrl);
|
|
|
|
}
|
|
|
|
spin_unlock(&rport->lock);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
nvme_fc_cleanup_for_unload(void)
|
|
|
|
{
|
|
|
|
struct nvme_fc_lport *lport;
|
|
|
|
struct nvme_fc_rport *rport;
|
|
|
|
|
|
|
|
list_for_each_entry(lport, &nvme_fc_lport_list, port_list) {
|
|
|
|
list_for_each_entry(rport, &lport->endp_list, endp_list) {
|
|
|
|
nvme_fc_delete_controllers(rport);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-12-02 16:28:42 +08:00
|
|
|
static void __exit nvme_fc_exit_module(void)
|
|
|
|
{
|
2019-06-29 08:26:08 +08:00
|
|
|
unsigned long flags;
|
|
|
|
bool need_cleanup = false;
|
|
|
|
|
|
|
|
spin_lock_irqsave(&nvme_fc_lock, flags);
|
|
|
|
nvme_fc_waiting_to_unload = true;
|
|
|
|
if (!list_empty(&nvme_fc_lport_list)) {
|
|
|
|
need_cleanup = true;
|
|
|
|
nvme_fc_cleanup_for_unload();
|
|
|
|
}
|
|
|
|
spin_unlock_irqrestore(&nvme_fc_lock, flags);
|
|
|
|
if (need_cleanup) {
|
|
|
|
pr_info("%s: waiting for ctlr deletes\n", __func__);
|
|
|
|
wait_for_completion(&nvme_fc_unload_proceed);
|
|
|
|
pr_info("%s: ctrl deletes complete\n", __func__);
|
|
|
|
}
|
2016-12-02 16:28:42 +08:00
|
|
|
|
|
|
|
nvmf_unregister_transport(&nvme_fc_transport);
|
|
|
|
|
|
|
|
ida_destroy(&nvme_fc_local_port_cnt);
|
|
|
|
ida_destroy(&nvme_fc_ctrl_cnt);
|
2017-09-15 01:38:41 +08:00
|
|
|
|
2018-09-14 07:17:38 +08:00
|
|
|
device_destroy(&fc_class, MKDEV(0, 0));
|
|
|
|
class_unregister(&fc_class);
|
2019-05-03 17:43:52 +08:00
|
|
|
destroy_workqueue(nvme_fc_wq);
|
2016-12-02 16:28:42 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
module_init(nvme_fc_init_module);
|
|
|
|
module_exit(nvme_fc_exit_module);
|
|
|
|
|
|
|
|
MODULE_LICENSE("GPL v2");
|