Merge branch 'nvme-4.19' of git://git.infradead.org/nvme into for-4.19/block2

Pull NVMe changes from Christoph:

"This contains the support for TP4004, Asymmetric Namespace Access,
 which makes NVMe multipathing usable in practice."

* 'nvme-4.19' of git://git.infradead.org/nvme:
  nvmet: use Retain Async Event bit to clear AEN
  nvmet: support configuring ANA groups
  nvmet: add minimal ANA support
  nvmet: track and limit the number of namespaces per subsystem
  nvmet: keep a port pointer in nvmet_ctrl
  nvme: add ANA support
  nvme: remove nvme_req_needs_failover
  nvme: simplify the API for getting log pages
  nvme.h: add ANA definitions
  nvme.h: add support for the log specific field

Signed-off-by: Jens Axboe <axboe@kernel.dk>
This commit is contained in:
Jens Axboe 2018-08-05 19:34:09 -06:00
commit f87b0f0dfa
9 changed files with 880 additions and 74 deletions

View File

@ -252,7 +252,8 @@ void nvme_complete_rq(struct request *req)
trace_nvme_complete_rq(req);
if (unlikely(status != BLK_STS_OK && nvme_req_needs_retry(req))) {
if (nvme_req_needs_failover(req, status)) {
if ((req->cmd_flags & REQ_NVME_MPATH) &&
blk_path_error(status)) {
nvme_failover_req(req);
return;
}
@ -1067,7 +1068,7 @@ int nvme_set_queue_count(struct nvme_ctrl *ctrl, int *count)
EXPORT_SYMBOL_GPL(nvme_set_queue_count);
#define NVME_AEN_SUPPORTED \
(NVME_AEN_CFG_NS_ATTR | NVME_AEN_CFG_FW_ACT)
(NVME_AEN_CFG_NS_ATTR | NVME_AEN_CFG_FW_ACT | NVME_AEN_CFG_ANA_CHANGE)
static void nvme_enable_aen(struct nvme_ctrl *ctrl)
{
@ -2281,21 +2282,16 @@ out_unlock:
return ret;
}
int nvme_get_log_ext(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
u8 log_page, void *log,
size_t size, u64 offset)
int nvme_get_log(struct nvme_ctrl *ctrl, u32 nsid, u8 log_page, u8 lsp,
void *log, size_t size, u64 offset)
{
struct nvme_command c = { };
unsigned long dwlen = size / 4 - 1;
c.get_log_page.opcode = nvme_admin_get_log_page;
if (ns)
c.get_log_page.nsid = cpu_to_le32(ns->head->ns_id);
else
c.get_log_page.nsid = cpu_to_le32(NVME_NSID_ALL);
c.get_log_page.nsid = cpu_to_le32(nsid);
c.get_log_page.lid = log_page;
c.get_log_page.lsp = lsp;
c.get_log_page.numdl = cpu_to_le16(dwlen & ((1 << 16) - 1));
c.get_log_page.numdu = cpu_to_le16(dwlen >> 16);
c.get_log_page.lpol = cpu_to_le32(lower_32_bits(offset));
@ -2304,12 +2300,6 @@ int nvme_get_log_ext(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
return nvme_submit_sync_cmd(ctrl->admin_q, &c, log, size);
}
static int nvme_get_log(struct nvme_ctrl *ctrl, u8 log_page, void *log,
size_t size)
{
return nvme_get_log_ext(ctrl, NULL, log_page, log, size, 0);
}
static int nvme_get_effects_log(struct nvme_ctrl *ctrl)
{
int ret;
@ -2320,8 +2310,8 @@ static int nvme_get_effects_log(struct nvme_ctrl *ctrl)
if (!ctrl->effects)
return 0;
ret = nvme_get_log(ctrl, NVME_LOG_CMD_EFFECTS, ctrl->effects,
sizeof(*ctrl->effects));
ret = nvme_get_log(ctrl, NVME_NSID_ALL, NVME_LOG_CMD_EFFECTS, 0,
ctrl->effects, sizeof(*ctrl->effects), 0);
if (ret) {
kfree(ctrl->effects);
ctrl->effects = NULL;
@ -2412,6 +2402,7 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
nvme_set_queue_limits(ctrl, ctrl->admin_q);
ctrl->sgls = le32_to_cpu(id->sgls);
ctrl->kas = le16_to_cpu(id->kas);
ctrl->max_namespaces = le32_to_cpu(id->mnan);
if (id->rtd3e) {
/* us -> s */
@ -2471,8 +2462,12 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
ctrl->hmmaxd = le16_to_cpu(id->hmmaxd);
}
ret = nvme_mpath_init(ctrl, id);
kfree(id);
if (ret < 0)
return ret;
if (ctrl->apst_enabled && !prev_apst_enabled)
dev_pm_qos_expose_latency_tolerance(ctrl->device);
else if (!ctrl->apst_enabled && prev_apst_enabled)
@ -2691,6 +2686,10 @@ static struct attribute *nvme_ns_id_attrs[] = {
&dev_attr_nguid.attr,
&dev_attr_eui.attr,
&dev_attr_nsid.attr,
#ifdef CONFIG_NVME_MULTIPATH
&dev_attr_ana_grpid.attr,
&dev_attr_ana_state.attr,
#endif
NULL,
};
@ -2713,6 +2712,14 @@ static umode_t nvme_ns_id_attrs_are_visible(struct kobject *kobj,
if (!memchr_inv(ids->eui64, 0, sizeof(ids->eui64)))
return 0;
}
#ifdef CONFIG_NVME_MULTIPATH
if (a == &dev_attr_ana_grpid.attr || a == &dev_attr_ana_state.attr) {
if (dev_to_disk(dev)->fops != &nvme_fops) /* per-path attr */
return 0;
if (!nvme_ctrl_use_ana(nvme_get_ns_from_dev(dev)->ctrl))
return 0;
}
#endif
return a->mode;
}
@ -3086,8 +3093,6 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
nvme_get_ctrl(ctrl);
kfree(id);
device_add_disk(ctrl->device, ns->disk);
if (sysfs_create_group(&disk_to_dev(ns->disk)->kobj,
&nvme_ns_id_attr_group))
@ -3097,8 +3102,10 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
pr_warn("%s: failed to register lightnvm sysfs group for identification\n",
ns->disk->disk_name);
nvme_mpath_add_disk(ns->head);
nvme_mpath_add_disk(ns, id);
nvme_fault_inject_init(ns);
kfree(id);
return;
out_unlink_ns:
mutex_lock(&ctrl->subsys->lock);
@ -3240,7 +3247,8 @@ static void nvme_clear_changed_ns_log(struct nvme_ctrl *ctrl)
* raced with us in reading the log page, which could cause us to miss
* updates.
*/
error = nvme_get_log(ctrl, NVME_LOG_CHANGED_NS, log, log_size);
error = nvme_get_log(ctrl, NVME_NSID_ALL, NVME_LOG_CHANGED_NS, 0, log,
log_size, 0);
if (error)
dev_warn(ctrl->device,
"reading changed ns log failed: %d\n", error);
@ -3357,9 +3365,9 @@ static void nvme_get_fw_slot_info(struct nvme_ctrl *ctrl)
if (!log)
return;
if (nvme_get_log(ctrl, NVME_LOG_FW_SLOT, log, sizeof(*log)))
dev_warn(ctrl->device,
"Get FW SLOT INFO log error\n");
if (nvme_get_log(ctrl, NVME_NSID_ALL, 0, NVME_LOG_FW_SLOT, log,
sizeof(*log), 0))
dev_warn(ctrl->device, "Get FW SLOT INFO log error\n");
kfree(log);
}
@ -3405,6 +3413,13 @@ static void nvme_handle_aen_notice(struct nvme_ctrl *ctrl, u32 result)
case NVME_AER_NOTICE_FW_ACT_STARTING:
queue_work(nvme_wq, &ctrl->fw_act_work);
break;
#ifdef CONFIG_NVME_MULTIPATH
case NVME_AER_NOTICE_ANA:
if (!ctrl->ana_log_buf)
break;
queue_work(nvme_wq, &ctrl->ana_work);
break;
#endif
default:
dev_warn(ctrl->device, "async event result %08x\n", result);
}
@ -3437,6 +3452,7 @@ EXPORT_SYMBOL_GPL(nvme_complete_async_event);
void nvme_stop_ctrl(struct nvme_ctrl *ctrl)
{
nvme_mpath_stop(ctrl);
nvme_stop_keep_alive(ctrl);
flush_work(&ctrl->async_event_work);
flush_work(&ctrl->scan_work);
@ -3474,6 +3490,7 @@ static void nvme_free_ctrl(struct device *dev)
ida_simple_remove(&nvme_instance_ida, ctrl->instance);
kfree(ctrl->effects);
nvme_mpath_uninit(ctrl);
if (subsys) {
mutex_lock(&subsys->lock);

View File

@ -604,8 +604,9 @@ static int nvme_nvm_get_chk_meta(struct nvm_dev *ndev,
while (left) {
len = min_t(unsigned int, left, max_len);
ret = nvme_get_log_ext(ctrl, ns, NVME_NVM_LOG_REPORT_CHUNK,
dev_meta, len, offset);
ret = nvme_get_log(ctrl, ns->head->ns_id,
NVME_NVM_LOG_REPORT_CHUNK, 0, dev_meta, len,
offset);
if (ret) {
dev_err(ctrl->device, "Get REPORT CHUNK log error\n");
break;

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2017 Christoph Hellwig.
* Copyright (c) 2017-2018 Christoph Hellwig.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
@ -20,6 +20,11 @@ module_param(multipath, bool, 0444);
MODULE_PARM_DESC(multipath,
"turn on native support for multiple controllers per subsystem");
inline bool nvme_ctrl_use_ana(struct nvme_ctrl *ctrl)
{
return multipath && (ctrl->subsys->cmic & (1 << 3));
}
/*
* If multipathing is enabled we need to always use the subsystem instance
* number for numbering our devices to avoid conflicts between subsystems that
@ -45,6 +50,7 @@ void nvme_set_disk_name(char *disk_name, struct nvme_ns *ns,
void nvme_failover_req(struct request *req)
{
struct nvme_ns *ns = req->q->queuedata;
u16 status = nvme_req(req)->status;
unsigned long flags;
spin_lock_irqsave(&ns->head->requeue_lock, flags);
@ -52,15 +58,35 @@ void nvme_failover_req(struct request *req)
spin_unlock_irqrestore(&ns->head->requeue_lock, flags);
blk_mq_end_request(req, 0);
nvme_reset_ctrl(ns->ctrl);
kblockd_schedule_work(&ns->head->requeue_work);
}
switch (status & 0x7ff) {
case NVME_SC_ANA_TRANSITION:
case NVME_SC_ANA_INACCESSIBLE:
case NVME_SC_ANA_PERSISTENT_LOSS:
/*
* If we got back an ANA error we know the controller is alive,
* but not ready to serve this namespaces. The spec suggests
* we should update our general state here, but due to the fact
* that the admin and I/O queues are not serialized that is
* fundamentally racy. So instead just clear the current path,
* mark the the path as pending and kick of a re-read of the ANA
* log page ASAP.
*/
nvme_mpath_clear_current_path(ns);
if (ns->ctrl->ana_log_buf) {
set_bit(NVME_NS_ANA_PENDING, &ns->flags);
queue_work(nvme_wq, &ns->ctrl->ana_work);
}
break;
default:
/*
* Reset the controller for any non-ANA error as we don't know
* what caused the error.
*/
nvme_reset_ctrl(ns->ctrl);
break;
}
bool nvme_req_needs_failover(struct request *req, blk_status_t error)
{
if (!(req->cmd_flags & REQ_NVME_MPATH))
return false;
return blk_path_error(error);
kblockd_schedule_work(&ns->head->requeue_work);
}
void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl)
@ -75,25 +101,51 @@ void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl)
up_read(&ctrl->namespaces_rwsem);
}
static const char *nvme_ana_state_names[] = {
[0] = "invalid state",
[NVME_ANA_OPTIMIZED] = "optimized",
[NVME_ANA_NONOPTIMIZED] = "non-optimized",
[NVME_ANA_INACCESSIBLE] = "inaccessible",
[NVME_ANA_PERSISTENT_LOSS] = "persistent-loss",
[NVME_ANA_CHANGE] = "change",
};
static struct nvme_ns *__nvme_find_path(struct nvme_ns_head *head)
{
struct nvme_ns *ns;
struct nvme_ns *ns, *fallback = NULL;
list_for_each_entry_rcu(ns, &head->list, siblings) {
if (ns->ctrl->state == NVME_CTRL_LIVE) {
if (ns->ctrl->state != NVME_CTRL_LIVE ||
test_bit(NVME_NS_ANA_PENDING, &ns->flags))
continue;
switch (ns->ana_state) {
case NVME_ANA_OPTIMIZED:
rcu_assign_pointer(head->current_path, ns);
return ns;
case NVME_ANA_NONOPTIMIZED:
fallback = ns;
break;
default:
break;
}
}
return NULL;
if (fallback)
rcu_assign_pointer(head->current_path, fallback);
return fallback;
}
static inline bool nvme_path_is_optimized(struct nvme_ns *ns)
{
return ns->ctrl->state == NVME_CTRL_LIVE &&
ns->ana_state == NVME_ANA_OPTIMIZED;
}
inline struct nvme_ns *nvme_find_path(struct nvme_ns_head *head)
{
struct nvme_ns *ns = srcu_dereference(head->current_path, &head->srcu);
if (unlikely(!ns || ns->ctrl->state != NVME_CTRL_LIVE))
if (unlikely(!ns || !nvme_path_is_optimized(ns)))
ns = __nvme_find_path(head);
return ns;
}
@ -142,7 +194,7 @@ static bool nvme_ns_head_poll(struct request_queue *q, blk_qc_t qc)
srcu_idx = srcu_read_lock(&head->srcu);
ns = srcu_dereference(head->current_path, &head->srcu);
if (likely(ns && ns->ctrl->state == NVME_CTRL_LIVE))
if (likely(ns && nvme_path_is_optimized(ns)))
found = ns->queue->poll_fn(q, qc);
srcu_read_unlock(&head->srcu, srcu_idx);
return found;
@ -176,6 +228,7 @@ int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl, struct nvme_ns_head *head)
struct request_queue *q;
bool vwc = false;
mutex_init(&head->lock);
bio_list_init(&head->requeue_list);
spin_lock_init(&head->requeue_lock);
INIT_WORK(&head->requeue_work, nvme_requeue_work);
@ -220,29 +273,232 @@ out:
return -ENOMEM;
}
void nvme_mpath_add_disk(struct nvme_ns_head *head)
static void nvme_mpath_set_live(struct nvme_ns *ns)
{
struct nvme_ns_head *head = ns->head;
lockdep_assert_held(&ns->head->lock);
if (!head->disk)
return;
mutex_lock(&head->subsys->lock);
if (!(head->disk->flags & GENHD_FL_UP)) {
device_add_disk(&head->subsys->dev, head->disk);
if (sysfs_create_group(&disk_to_dev(head->disk)->kobj,
&nvme_ns_id_attr_group))
pr_warn("%s: failed to create sysfs group for identification\n",
head->disk->disk_name);
dev_warn(&head->subsys->dev,
"failed to create id group.\n");
}
kblockd_schedule_work(&ns->head->requeue_work);
}
static int nvme_parse_ana_log(struct nvme_ctrl *ctrl, void *data,
int (*cb)(struct nvme_ctrl *ctrl, struct nvme_ana_group_desc *,
void *))
{
void *base = ctrl->ana_log_buf;
size_t offset = sizeof(struct nvme_ana_rsp_hdr);
int error, i;
lockdep_assert_held(&ctrl->ana_lock);
for (i = 0; i < le16_to_cpu(ctrl->ana_log_buf->ngrps); i++) {
struct nvme_ana_group_desc *desc = base + offset;
u32 nr_nsids = le32_to_cpu(desc->nnsids);
size_t nsid_buf_size = nr_nsids * sizeof(__le32);
if (WARN_ON_ONCE(desc->grpid == 0))
return -EINVAL;
if (WARN_ON_ONCE(le32_to_cpu(desc->grpid) > ctrl->anagrpmax))
return -EINVAL;
if (WARN_ON_ONCE(desc->state == 0))
return -EINVAL;
if (WARN_ON_ONCE(desc->state > NVME_ANA_CHANGE))
return -EINVAL;
offset += sizeof(*desc);
if (WARN_ON_ONCE(offset > ctrl->ana_log_size - nsid_buf_size))
return -EINVAL;
error = cb(ctrl, desc, data);
if (error)
return error;
offset += nsid_buf_size;
if (WARN_ON_ONCE(offset > ctrl->ana_log_size - sizeof(*desc)))
return -EINVAL;
}
return 0;
}
static inline bool nvme_state_is_live(enum nvme_ana_state state)
{
return state == NVME_ANA_OPTIMIZED || state == NVME_ANA_NONOPTIMIZED;
}
static void nvme_update_ns_ana_state(struct nvme_ana_group_desc *desc,
struct nvme_ns *ns)
{
enum nvme_ana_state old;
mutex_lock(&ns->head->lock);
old = ns->ana_state;
ns->ana_grpid = le32_to_cpu(desc->grpid);
ns->ana_state = desc->state;
clear_bit(NVME_NS_ANA_PENDING, &ns->flags);
if (nvme_state_is_live(ns->ana_state) && !nvme_state_is_live(old))
nvme_mpath_set_live(ns);
mutex_unlock(&ns->head->lock);
}
static int nvme_update_ana_state(struct nvme_ctrl *ctrl,
struct nvme_ana_group_desc *desc, void *data)
{
u32 nr_nsids = le32_to_cpu(desc->nnsids), n = 0;
unsigned *nr_change_groups = data;
struct nvme_ns *ns;
dev_info(ctrl->device, "ANA group %d: %s.\n",
le32_to_cpu(desc->grpid),
nvme_ana_state_names[desc->state]);
if (desc->state == NVME_ANA_CHANGE)
(*nr_change_groups)++;
if (!nr_nsids)
return 0;
down_write(&ctrl->namespaces_rwsem);
list_for_each_entry(ns, &ctrl->namespaces, list) {
if (ns->head->ns_id != le32_to_cpu(desc->nsids[n]))
continue;
nvme_update_ns_ana_state(desc, ns);
if (++n == nr_nsids)
break;
}
up_write(&ctrl->namespaces_rwsem);
WARN_ON_ONCE(n < nr_nsids);
return 0;
}
static int nvme_read_ana_log(struct nvme_ctrl *ctrl, bool groups_only)
{
u32 nr_change_groups = 0;
int error;
mutex_lock(&ctrl->ana_lock);
error = nvme_get_log(ctrl, NVME_NSID_ALL, NVME_LOG_ANA,
groups_only ? NVME_ANA_LOG_RGO : 0,
ctrl->ana_log_buf, ctrl->ana_log_size, 0);
if (error) {
dev_warn(ctrl->device, "Failed to get ANA log: %d\n", error);
goto out_unlock;
}
error = nvme_parse_ana_log(ctrl, &nr_change_groups,
nvme_update_ana_state);
if (error)
goto out_unlock;
/*
* In theory we should have an ANATT timer per group as they might enter
* the change state at different times. But that is a lot of overhead
* just to protect against a target that keeps entering new changes
* states while never finishing previous ones. But we'll still
* eventually time out once all groups are in change state, so this
* isn't a big deal.
*
* We also double the ANATT value to provide some slack for transports
* or AEN processing overhead.
*/
if (nr_change_groups)
mod_timer(&ctrl->anatt_timer, ctrl->anatt * HZ * 2 + jiffies);
else
del_timer_sync(&ctrl->anatt_timer);
out_unlock:
mutex_unlock(&ctrl->ana_lock);
return error;
}
static void nvme_ana_work(struct work_struct *work)
{
struct nvme_ctrl *ctrl = container_of(work, struct nvme_ctrl, ana_work);
nvme_read_ana_log(ctrl, false);
}
static void nvme_anatt_timeout(struct timer_list *t)
{
struct nvme_ctrl *ctrl = from_timer(ctrl, t, anatt_timer);
dev_info(ctrl->device, "ANATT timeout, resetting controller.\n");
nvme_reset_ctrl(ctrl);
}
void nvme_mpath_stop(struct nvme_ctrl *ctrl)
{
if (!nvme_ctrl_use_ana(ctrl))
return;
del_timer_sync(&ctrl->anatt_timer);
cancel_work_sync(&ctrl->ana_work);
}
static ssize_t ana_grpid_show(struct device *dev, struct device_attribute *attr,
char *buf)
{
return sprintf(buf, "%d\n", nvme_get_ns_from_dev(dev)->ana_grpid);
}
DEVICE_ATTR_RO(ana_grpid);
static ssize_t ana_state_show(struct device *dev, struct device_attribute *attr,
char *buf)
{
struct nvme_ns *ns = nvme_get_ns_from_dev(dev);
return sprintf(buf, "%s\n", nvme_ana_state_names[ns->ana_state]);
}
DEVICE_ATTR_RO(ana_state);
static int nvme_set_ns_ana_state(struct nvme_ctrl *ctrl,
struct nvme_ana_group_desc *desc, void *data)
{
struct nvme_ns *ns = data;
if (ns->ana_grpid == le32_to_cpu(desc->grpid)) {
nvme_update_ns_ana_state(desc, ns);
return -ENXIO; /* just break out of the loop */
}
return 0;
}
void nvme_mpath_add_disk(struct nvme_ns *ns, struct nvme_id_ns *id)
{
if (nvme_ctrl_use_ana(ns->ctrl)) {
mutex_lock(&ns->ctrl->ana_lock);
ns->ana_grpid = le32_to_cpu(id->anagrpid);
nvme_parse_ana_log(ns->ctrl, ns, nvme_set_ns_ana_state);
mutex_unlock(&ns->ctrl->ana_lock);
} else {
mutex_lock(&ns->head->lock);
ns->ana_state = NVME_ANA_OPTIMIZED;
nvme_mpath_set_live(ns);
mutex_unlock(&ns->head->lock);
}
mutex_unlock(&head->subsys->lock);
}
void nvme_mpath_remove_disk(struct nvme_ns_head *head)
{
if (!head->disk)
return;
sysfs_remove_group(&disk_to_dev(head->disk)->kobj,
&nvme_ns_id_attr_group);
del_gendisk(head->disk);
if (head->disk->flags & GENHD_FL_UP) {
sysfs_remove_group(&disk_to_dev(head->disk)->kobj,
&nvme_ns_id_attr_group);
del_gendisk(head->disk);
}
blk_set_queue_dying(head->disk->queue);
/* make sure all pending bios are cleaned up */
kblockd_schedule_work(&head->requeue_work);
@ -250,3 +506,52 @@ void nvme_mpath_remove_disk(struct nvme_ns_head *head)
blk_cleanup_queue(head->disk->queue);
put_disk(head->disk);
}
int nvme_mpath_init(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id)
{
int error;
if (!nvme_ctrl_use_ana(ctrl))
return 0;
ctrl->anacap = id->anacap;
ctrl->anatt = id->anatt;
ctrl->nanagrpid = le32_to_cpu(id->nanagrpid);
ctrl->anagrpmax = le32_to_cpu(id->anagrpmax);
mutex_init(&ctrl->ana_lock);
timer_setup(&ctrl->anatt_timer, nvme_anatt_timeout, 0);
ctrl->ana_log_size = sizeof(struct nvme_ana_rsp_hdr) +
ctrl->nanagrpid * sizeof(struct nvme_ana_group_desc);
if (!(ctrl->anacap & (1 << 6)))
ctrl->ana_log_size += ctrl->max_namespaces * sizeof(__le32);
if (ctrl->ana_log_size > ctrl->max_hw_sectors << SECTOR_SHIFT) {
dev_err(ctrl->device,
"ANA log page size (%zd) larger than MDTS (%d).\n",
ctrl->ana_log_size,
ctrl->max_hw_sectors << SECTOR_SHIFT);
dev_err(ctrl->device, "disabling ANA support.\n");
return 0;
}
INIT_WORK(&ctrl->ana_work, nvme_ana_work);
ctrl->ana_log_buf = kmalloc(ctrl->ana_log_size, GFP_KERNEL);
if (!ctrl->ana_log_buf)
goto out;
error = nvme_read_ana_log(ctrl, true);
if (error)
goto out_free_ana_log_buf;
return 0;
out_free_ana_log_buf:
kfree(ctrl->ana_log_buf);
out:
return -ENOMEM;
}
void nvme_mpath_uninit(struct nvme_ctrl *ctrl)
{
kfree(ctrl->ana_log_buf);
}

View File

@ -183,6 +183,7 @@ struct nvme_ctrl {
u16 oacs;
u16 nssa;
u16 nr_streams;
u32 max_namespaces;
atomic_t abort_limit;
u8 vwc;
u32 vs;
@ -205,6 +206,19 @@ struct nvme_ctrl {
struct work_struct fw_act_work;
unsigned long events;
#ifdef CONFIG_NVME_MULTIPATH
/* asymmetric namespace access: */
u8 anacap;
u8 anatt;
u32 anagrpmax;
u32 nanagrpid;
struct mutex ana_lock;
struct nvme_ana_rsp_hdr *ana_log_buf;
size_t ana_log_size;
struct timer_list anatt_timer;
struct work_struct ana_work;
#endif
/* Power saving configuration */
u64 ps_max_latency_us;
bool apst_enabled;
@ -269,6 +283,7 @@ struct nvme_ns_head {
struct bio_list requeue_list;
spinlock_t requeue_lock;
struct work_struct requeue_work;
struct mutex lock;
#endif
struct list_head list;
struct srcu_struct srcu;
@ -295,6 +310,10 @@ struct nvme_ns {
struct nvme_ctrl *ctrl;
struct request_queue *queue;
struct gendisk *disk;
#ifdef CONFIG_NVME_MULTIPATH
enum nvme_ana_state ana_state;
u32 ana_grpid;
#endif
struct list_head siblings;
struct nvm_dev *ndev;
struct kref kref;
@ -307,8 +326,9 @@ struct nvme_ns {
bool ext;
u8 pi_type;
unsigned long flags;
#define NVME_NS_REMOVING 0
#define NVME_NS_DEAD 1
#define NVME_NS_REMOVING 0
#define NVME_NS_DEAD 1
#define NVME_NS_ANA_PENDING 2
u16 noiob;
#ifdef CONFIG_FAULT_INJECTION_DEBUG_FS
@ -436,21 +456,24 @@ int nvme_reset_ctrl_sync(struct nvme_ctrl *ctrl);
int nvme_delete_ctrl(struct nvme_ctrl *ctrl);
int nvme_delete_ctrl_sync(struct nvme_ctrl *ctrl);
int nvme_get_log_ext(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
u8 log_page, void *log, size_t size, u64 offset);
int nvme_get_log(struct nvme_ctrl *ctrl, u32 nsid, u8 log_page, u8 lsp,
void *log, size_t size, u64 offset);
extern const struct attribute_group nvme_ns_id_attr_group;
extern const struct block_device_operations nvme_ns_head_ops;
#ifdef CONFIG_NVME_MULTIPATH
bool nvme_ctrl_use_ana(struct nvme_ctrl *ctrl);
void nvme_set_disk_name(char *disk_name, struct nvme_ns *ns,
struct nvme_ctrl *ctrl, int *flags);
void nvme_failover_req(struct request *req);
bool nvme_req_needs_failover(struct request *req, blk_status_t error);
void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl);
int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl,struct nvme_ns_head *head);
void nvme_mpath_add_disk(struct nvme_ns_head *head);
void nvme_mpath_add_disk(struct nvme_ns *ns, struct nvme_id_ns *id);
void nvme_mpath_remove_disk(struct nvme_ns_head *head);
int nvme_mpath_init(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id);
void nvme_mpath_uninit(struct nvme_ctrl *ctrl);
void nvme_mpath_stop(struct nvme_ctrl *ctrl);
static inline void nvme_mpath_clear_current_path(struct nvme_ns *ns)
{
@ -469,7 +492,14 @@ static inline void nvme_mpath_check_last_path(struct nvme_ns *ns)
kblockd_schedule_work(&head->requeue_work);
}
extern struct device_attribute dev_attr_ana_grpid;
extern struct device_attribute dev_attr_ana_state;
#else
static inline bool nvme_ctrl_use_ana(struct nvme_ctrl *ctrl)
{
return false;
}
/*
* Without the multipath code enabled, multiple controller per subsystems are
* visible as devices and thus we cannot use the subsystem instance.
@ -483,11 +513,6 @@ static inline void nvme_set_disk_name(char *disk_name, struct nvme_ns *ns,
static inline void nvme_failover_req(struct request *req)
{
}
static inline bool nvme_req_needs_failover(struct request *req,
blk_status_t error)
{
return false;
}
static inline void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl)
{
}
@ -496,7 +521,8 @@ static inline int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl,
{
return 0;
}
static inline void nvme_mpath_add_disk(struct nvme_ns_head *head)
static inline void nvme_mpath_add_disk(struct nvme_ns *ns,
struct nvme_id_ns *id)
{
}
static inline void nvme_mpath_remove_disk(struct nvme_ns_head *head)
@ -508,6 +534,17 @@ static inline void nvme_mpath_clear_current_path(struct nvme_ns *ns)
static inline void nvme_mpath_check_last_path(struct nvme_ns *ns)
{
}
static inline int nvme_mpath_init(struct nvme_ctrl *ctrl,
struct nvme_id_ctrl *id)
{
return 0;
}
static inline void nvme_mpath_uninit(struct nvme_ctrl *ctrl)
{
}
static inline void nvme_mpath_stop(struct nvme_ctrl *ctrl)
{
}
#endif /* CONFIG_NVME_MULTIPATH */
#ifdef CONFIG_NVM

View File

@ -19,6 +19,19 @@
#include <asm/unaligned.h>
#include "nvmet.h"
/*
* This helper allows us to clear the AEN based on the RAE bit,
* Please use this helper when processing the log pages which are
* associated with the AEN.
*/
static inline void nvmet_clear_aen(struct nvmet_req *req, u32 aen_bit)
{
int rae = le32_to_cpu(req->cmd->common.cdw10[0]) & 1 << 15;
if (!rae)
clear_bit(aen_bit, &req->sq->ctrl->aen_masked);
}
u32 nvmet_get_log_page_len(struct nvme_command *cmd)
{
u32 len = le16_to_cpu(cmd->get_log_page.numdu);
@ -176,12 +189,76 @@ static void nvmet_execute_get_log_changed_ns(struct nvmet_req *req)
if (!status)
status = nvmet_zero_sgl(req, len, req->data_len - len);
ctrl->nr_changed_ns = 0;
clear_bit(NVME_AEN_CFG_NS_ATTR, &ctrl->aen_masked);
nvmet_clear_aen(req, NVME_AEN_CFG_NS_ATTR);
mutex_unlock(&ctrl->lock);
out:
nvmet_req_complete(req, status);
}
static u32 nvmet_format_ana_group(struct nvmet_req *req, u32 grpid,
struct nvme_ana_group_desc *desc)
{
struct nvmet_ctrl *ctrl = req->sq->ctrl;
struct nvmet_ns *ns;
u32 count = 0;
if (!(req->cmd->get_log_page.lsp & NVME_ANA_LOG_RGO)) {
rcu_read_lock();
list_for_each_entry_rcu(ns, &ctrl->subsys->namespaces, dev_link)
if (ns->anagrpid == grpid)
desc->nsids[count++] = cpu_to_le32(ns->nsid);
rcu_read_unlock();
}
desc->grpid = cpu_to_le32(grpid);
desc->nnsids = cpu_to_le32(count);
desc->chgcnt = cpu_to_le64(nvmet_ana_chgcnt);
desc->state = req->port->ana_state[grpid];
memset(desc->rsvd17, 0, sizeof(desc->rsvd17));
return sizeof(struct nvme_ana_group_desc) + count * sizeof(__le32);
}
static void nvmet_execute_get_log_page_ana(struct nvmet_req *req)
{
struct nvme_ana_rsp_hdr hdr = { 0, };
struct nvme_ana_group_desc *desc;
size_t offset = sizeof(struct nvme_ana_rsp_hdr); /* start beyond hdr */
size_t len;
u32 grpid;
u16 ngrps = 0;
u16 status;
status = NVME_SC_INTERNAL;
desc = kmalloc(sizeof(struct nvme_ana_group_desc) +
NVMET_MAX_NAMESPACES * sizeof(__le32), GFP_KERNEL);
if (!desc)
goto out;
down_read(&nvmet_ana_sem);
for (grpid = 1; grpid <= NVMET_MAX_ANAGRPS; grpid++) {
if (!nvmet_ana_group_enabled[grpid])
continue;
len = nvmet_format_ana_group(req, grpid, desc);
status = nvmet_copy_to_sgl(req, offset, desc, len);
if (status)
break;
offset += len;
ngrps++;
}
hdr.chgcnt = cpu_to_le64(nvmet_ana_chgcnt);
hdr.ngrps = cpu_to_le16(ngrps);
nvmet_clear_aen(req, NVME_AEN_CFG_ANA_CHANGE);
up_read(&nvmet_ana_sem);
kfree(desc);
/* copy the header last once we know the number of groups */
status = nvmet_copy_to_sgl(req, 0, &hdr, sizeof(hdr));
out:
nvmet_req_complete(req, status);
}
static void nvmet_execute_identify_ctrl(struct nvmet_req *req)
{
struct nvmet_ctrl *ctrl = req->sq->ctrl;
@ -213,8 +290,8 @@ static void nvmet_execute_identify_ctrl(struct nvmet_req *req)
* the safest is to leave it as zeroes.
*/
/* we support multiple ports and multiples hosts: */
id->cmic = (1 << 0) | (1 << 1);
/* we support multiple ports, multiples hosts and ANA: */
id->cmic = (1 << 0) | (1 << 1) | (1 << 3);
/* no limit on data transfer sizes for now */
id->mdts = 0;
@ -252,6 +329,7 @@ static void nvmet_execute_identify_ctrl(struct nvmet_req *req)
id->maxcmd = cpu_to_le16(NVMET_MAX_CMD);
id->nn = cpu_to_le32(ctrl->subsys->max_nsid);
id->mnan = cpu_to_le32(NVMET_MAX_NAMESPACES);
id->oncs = cpu_to_le16(NVME_CTRL_ONCS_DSM |
NVME_CTRL_ONCS_WRITE_ZEROES);
@ -281,6 +359,11 @@ static void nvmet_execute_identify_ctrl(struct nvmet_req *req)
id->msdbd = ctrl->ops->msdbd;
id->anacap = (1 << 0) | (1 << 1) | (1 << 2) | (1 << 3) | (1 << 4);
id->anatt = 10; /* random value */
id->anagrpmax = cpu_to_le32(NVMET_MAX_ANAGRPS);
id->nanagrpid = cpu_to_le32(NVMET_MAX_ANAGRPS);
/*
* Meh, we don't really support any power state. Fake up the same
* values that qemu does.
@ -322,8 +405,15 @@ static void nvmet_execute_identify_ns(struct nvmet_req *req)
* nuse = ncap = nsze isn't always true, but we have no way to find
* that out from the underlying device.
*/
id->ncap = id->nuse = id->nsze =
cpu_to_le64(ns->size >> ns->blksize_shift);
id->ncap = id->nsze = cpu_to_le64(ns->size >> ns->blksize_shift);
switch (req->port->ana_state[ns->anagrpid]) {
case NVME_ANA_INACCESSIBLE:
case NVME_ANA_PERSISTENT_LOSS:
break;
default:
id->nuse = id->nsze;
break;
}
/*
* We just provide a single LBA format that matches what the
@ -337,6 +427,7 @@ static void nvmet_execute_identify_ns(struct nvmet_req *req)
* controllers, but also with any other user of the block device.
*/
id->nmic = (1 << 0);
id->anagrpid = cpu_to_le32(ns->anagrpid);
memcpy(&id->nguid, &ns->nguid, sizeof(id->nguid));
@ -619,6 +710,9 @@ u16 nvmet_parse_admin_cmd(struct nvmet_req *req)
case NVME_LOG_CMD_EFFECTS:
req->execute = nvmet_execute_get_log_cmd_effects_ns;
return 0;
case NVME_LOG_ANA:
req->execute = nvmet_execute_get_log_page_ana;
return 0;
}
break;
case nvme_admin_identify:

View File

@ -411,6 +411,39 @@ out_unlock:
CONFIGFS_ATTR(nvmet_ns_, device_nguid);
static ssize_t nvmet_ns_ana_grpid_show(struct config_item *item, char *page)
{
return sprintf(page, "%u\n", to_nvmet_ns(item)->anagrpid);
}
static ssize_t nvmet_ns_ana_grpid_store(struct config_item *item,
const char *page, size_t count)
{
struct nvmet_ns *ns = to_nvmet_ns(item);
u32 oldgrpid, newgrpid;
int ret;
ret = kstrtou32(page, 0, &newgrpid);
if (ret)
return ret;
if (newgrpid < 1 || newgrpid > NVMET_MAX_ANAGRPS)
return -EINVAL;
down_write(&nvmet_ana_sem);
oldgrpid = ns->anagrpid;
nvmet_ana_group_enabled[newgrpid]++;
ns->anagrpid = newgrpid;
nvmet_ana_group_enabled[oldgrpid]--;
nvmet_ana_chgcnt++;
up_write(&nvmet_ana_sem);
nvmet_send_ana_event(ns->subsys, NULL);
return count;
}
CONFIGFS_ATTR(nvmet_ns_, ana_grpid);
static ssize_t nvmet_ns_enable_show(struct config_item *item, char *page)
{
return sprintf(page, "%d\n", to_nvmet_ns(item)->enabled);
@ -468,6 +501,7 @@ static struct configfs_attribute *nvmet_ns_attrs[] = {
&nvmet_ns_attr_device_path,
&nvmet_ns_attr_device_nguid,
&nvmet_ns_attr_device_uuid,
&nvmet_ns_attr_ana_grpid,
&nvmet_ns_attr_enable,
&nvmet_ns_attr_buffered_io,
NULL,
@ -916,6 +950,134 @@ static const struct config_item_type nvmet_referrals_type = {
.ct_group_ops = &nvmet_referral_group_ops,
};
static struct {
enum nvme_ana_state state;
const char *name;
} nvmet_ana_state_names[] = {
{ NVME_ANA_OPTIMIZED, "optimized" },
{ NVME_ANA_NONOPTIMIZED, "non-optimized" },
{ NVME_ANA_INACCESSIBLE, "inaccessible" },
{ NVME_ANA_PERSISTENT_LOSS, "persistent-loss" },
{ NVME_ANA_CHANGE, "change" },
};
static ssize_t nvmet_ana_group_ana_state_show(struct config_item *item,
char *page)
{
struct nvmet_ana_group *grp = to_ana_group(item);
enum nvme_ana_state state = grp->port->ana_state[grp->grpid];
int i;
for (i = 0; i < ARRAY_SIZE(nvmet_ana_state_names); i++) {
if (state != nvmet_ana_state_names[i].state)
continue;
return sprintf(page, "%s\n", nvmet_ana_state_names[i].name);
}
return sprintf(page, "\n");
}
static ssize_t nvmet_ana_group_ana_state_store(struct config_item *item,
const char *page, size_t count)
{
struct nvmet_ana_group *grp = to_ana_group(item);
int i;
for (i = 0; i < ARRAY_SIZE(nvmet_ana_state_names); i++) {
if (sysfs_streq(page, nvmet_ana_state_names[i].name))
goto found;
}
pr_err("Invalid value '%s' for ana_state\n", page);
return -EINVAL;
found:
down_write(&nvmet_ana_sem);
grp->port->ana_state[grp->grpid] = nvmet_ana_state_names[i].state;
nvmet_ana_chgcnt++;
up_write(&nvmet_ana_sem);
nvmet_port_send_ana_event(grp->port);
return count;
}
CONFIGFS_ATTR(nvmet_ana_group_, ana_state);
static struct configfs_attribute *nvmet_ana_group_attrs[] = {
&nvmet_ana_group_attr_ana_state,
NULL,
};
static void nvmet_ana_group_release(struct config_item *item)
{
struct nvmet_ana_group *grp = to_ana_group(item);
if (grp == &grp->port->ana_default_group)
return;
down_write(&nvmet_ana_sem);
grp->port->ana_state[grp->grpid] = NVME_ANA_INACCESSIBLE;
nvmet_ana_group_enabled[grp->grpid]--;
up_write(&nvmet_ana_sem);
nvmet_port_send_ana_event(grp->port);
kfree(grp);
}
static struct configfs_item_operations nvmet_ana_group_item_ops = {
.release = nvmet_ana_group_release,
};
static const struct config_item_type nvmet_ana_group_type = {
.ct_item_ops = &nvmet_ana_group_item_ops,
.ct_attrs = nvmet_ana_group_attrs,
.ct_owner = THIS_MODULE,
};
static struct config_group *nvmet_ana_groups_make_group(
struct config_group *group, const char *name)
{
struct nvmet_port *port = ana_groups_to_port(&group->cg_item);
struct nvmet_ana_group *grp;
u32 grpid;
int ret;
ret = kstrtou32(name, 0, &grpid);
if (ret)
goto out;
ret = -EINVAL;
if (grpid <= 1 || grpid > NVMET_MAX_ANAGRPS)
goto out;
ret = -ENOMEM;
grp = kzalloc(sizeof(*grp), GFP_KERNEL);
if (!grp)
goto out;
grp->port = port;
grp->grpid = grpid;
down_write(&nvmet_ana_sem);
nvmet_ana_group_enabled[grpid]++;
up_write(&nvmet_ana_sem);
nvmet_port_send_ana_event(grp->port);
config_group_init_type_name(&grp->group, name, &nvmet_ana_group_type);
return &grp->group;
out:
return ERR_PTR(ret);
}
static struct configfs_group_operations nvmet_ana_groups_group_ops = {
.make_group = nvmet_ana_groups_make_group,
};
static const struct config_item_type nvmet_ana_groups_type = {
.ct_group_ops = &nvmet_ana_groups_group_ops,
.ct_owner = THIS_MODULE,
};
/*
* Ports definitions.
*/
@ -923,6 +1085,7 @@ static void nvmet_port_release(struct config_item *item)
{
struct nvmet_port *port = to_nvmet_port(item);
kfree(port->ana_state);
kfree(port);
}
@ -951,6 +1114,7 @@ static struct config_group *nvmet_ports_make(struct config_group *group,
{
struct nvmet_port *port;
u16 portid;
u32 i;
if (kstrtou16(name, 0, &portid))
return ERR_PTR(-EINVAL);
@ -959,6 +1123,20 @@ static struct config_group *nvmet_ports_make(struct config_group *group,
if (!port)
return ERR_PTR(-ENOMEM);
port->ana_state = kcalloc(NVMET_MAX_ANAGRPS + 1,
sizeof(*port->ana_state), GFP_KERNEL);
if (!port->ana_state) {
kfree(port);
return ERR_PTR(-ENOMEM);
}
for (i = 1; i <= NVMET_MAX_ANAGRPS; i++) {
if (i == NVMET_DEFAULT_ANA_GRPID)
port->ana_state[1] = NVME_ANA_OPTIMIZED;
else
port->ana_state[i] = NVME_ANA_INACCESSIBLE;
}
INIT_LIST_HEAD(&port->entry);
INIT_LIST_HEAD(&port->subsystems);
INIT_LIST_HEAD(&port->referrals);
@ -975,6 +1153,18 @@ static struct config_group *nvmet_ports_make(struct config_group *group,
"referrals", &nvmet_referrals_type);
configfs_add_default_group(&port->referrals_group, &port->group);
config_group_init_type_name(&port->ana_groups_group,
"ana_groups", &nvmet_ana_groups_type);
configfs_add_default_group(&port->ana_groups_group, &port->group);
port->ana_default_group.port = port;
port->ana_default_group.grpid = NVMET_DEFAULT_ANA_GRPID;
config_group_init_type_name(&port->ana_default_group.group,
__stringify(NVMET_DEFAULT_ANA_GRPID),
&nvmet_ana_group_type);
configfs_add_default_group(&port->ana_default_group.group,
&port->ana_groups_group);
return &port->group;
}

View File

@ -40,6 +40,10 @@ static DEFINE_IDA(cntlid_ida);
*/
DECLARE_RWSEM(nvmet_config_sem);
u32 nvmet_ana_group_enabled[NVMET_MAX_ANAGRPS + 1];
u64 nvmet_ana_chgcnt;
DECLARE_RWSEM(nvmet_ana_sem);
static struct nvmet_subsys *nvmet_find_get_subsys(struct nvmet_port *port,
const char *subsysnqn);
@ -190,6 +194,33 @@ static void nvmet_ns_changed(struct nvmet_subsys *subsys, u32 nsid)
}
}
void nvmet_send_ana_event(struct nvmet_subsys *subsys,
struct nvmet_port *port)
{
struct nvmet_ctrl *ctrl;
mutex_lock(&subsys->lock);
list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) {
if (port && ctrl->port != port)
continue;
if (nvmet_aen_disabled(ctrl, NVME_AEN_CFG_ANA_CHANGE))
continue;
nvmet_add_async_event(ctrl, NVME_AER_TYPE_NOTICE,
NVME_AER_NOTICE_ANA, NVME_LOG_ANA);
}
mutex_unlock(&subsys->lock);
}
void nvmet_port_send_ana_event(struct nvmet_port *port)
{
struct nvmet_subsys_link *p;
down_read(&nvmet_config_sem);
list_for_each_entry(p, &port->subsystems, entry)
nvmet_send_ana_event(p->subsys, port);
up_read(&nvmet_config_sem);
}
int nvmet_register_transport(const struct nvmet_fabrics_ops *ops)
{
int ret = 0;
@ -337,9 +368,13 @@ static void nvmet_ns_dev_disable(struct nvmet_ns *ns)
int nvmet_ns_enable(struct nvmet_ns *ns)
{
struct nvmet_subsys *subsys = ns->subsys;
int ret = 0;
int ret;
mutex_lock(&subsys->lock);
ret = -EMFILE;
if (subsys->nr_namespaces == NVMET_MAX_NAMESPACES)
goto out_unlock;
ret = 0;
if (ns->enabled)
goto out_unlock;
@ -374,6 +409,7 @@ int nvmet_ns_enable(struct nvmet_ns *ns)
list_add_tail_rcu(&ns->dev_link, &old->dev_link);
}
subsys->nr_namespaces++;
nvmet_ns_changed(subsys, ns->nsid);
ns->enabled = true;
@ -414,6 +450,7 @@ void nvmet_ns_disable(struct nvmet_ns *ns)
percpu_ref_exit(&ns->ref);
mutex_lock(&subsys->lock);
subsys->nr_namespaces--;
nvmet_ns_changed(subsys, ns->nsid);
nvmet_ns_dev_disable(ns);
out_unlock:
@ -424,6 +461,10 @@ void nvmet_ns_free(struct nvmet_ns *ns)
{
nvmet_ns_disable(ns);
down_write(&nvmet_ana_sem);
nvmet_ana_group_enabled[ns->anagrpid]--;
up_write(&nvmet_ana_sem);
kfree(ns->device_path);
kfree(ns);
}
@ -441,6 +482,12 @@ struct nvmet_ns *nvmet_ns_alloc(struct nvmet_subsys *subsys, u32 nsid)
ns->nsid = nsid;
ns->subsys = subsys;
down_write(&nvmet_ana_sem);
ns->anagrpid = NVMET_DEFAULT_ANA_GRPID;
nvmet_ana_group_enabled[ns->anagrpid]++;
up_write(&nvmet_ana_sem);
uuid_gen(&ns->uuid);
ns->buffered_io = false;
@ -548,6 +595,20 @@ int nvmet_sq_init(struct nvmet_sq *sq)
}
EXPORT_SYMBOL_GPL(nvmet_sq_init);
static inline u16 nvmet_check_ana_state(struct nvmet_port *port,
struct nvmet_ns *ns)
{
enum nvme_ana_state state = port->ana_state[ns->anagrpid];
if (unlikely(state == NVME_ANA_INACCESSIBLE))
return NVME_SC_ANA_INACCESSIBLE;
if (unlikely(state == NVME_ANA_PERSISTENT_LOSS))
return NVME_SC_ANA_PERSISTENT_LOSS;
if (unlikely(state == NVME_ANA_CHANGE))
return NVME_SC_ANA_TRANSITION;
return 0;
}
static u16 nvmet_parse_io_cmd(struct nvmet_req *req)
{
struct nvme_command *cmd = req->cmd;
@ -560,6 +621,9 @@ static u16 nvmet_parse_io_cmd(struct nvmet_req *req)
req->ns = nvmet_find_namespace(req->sq->ctrl, cmd->rw.nsid);
if (unlikely(!req->ns))
return NVME_SC_INVALID_NS | NVME_SC_DNR;
ret = nvmet_check_ana_state(req->port, req->ns);
if (unlikely(ret))
return ret;
if (req->ns->file)
return nvmet_file_parse_io_cmd(req);
@ -876,6 +940,8 @@ u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn,
nvmet_init_cap(ctrl);
ctrl->port = req->port;
INIT_WORK(&ctrl->async_event_work, nvmet_async_event_work);
INIT_LIST_HEAD(&ctrl->async_events);
@ -1115,12 +1181,15 @@ static int __init nvmet_init(void)
{
int error;
nvmet_ana_group_enabled[NVMET_DEFAULT_ANA_GRPID] = 1;
buffered_io_wq = alloc_workqueue("nvmet-buffered-io-wq",
WQ_MEM_RECLAIM, 0);
if (!buffered_io_wq) {
error = -ENOMEM;
goto out;
}
error = nvmet_init_discovery();
if (error)
goto out;

View File

@ -30,12 +30,11 @@
#define NVMET_ASYNC_EVENTS 4
#define NVMET_ERROR_LOG_SLOTS 128
/*
* Supported optional AENs:
*/
#define NVMET_AEN_CFG_OPTIONAL \
NVME_AEN_CFG_NS_ATTR
(NVME_AEN_CFG_NS_ATTR | NVME_AEN_CFG_ANA_CHANGE)
/*
* Plus mandatory SMART AENs (we'll never send them, but allow enabling them):
@ -64,6 +63,7 @@ struct nvmet_ns {
loff_t size;
u8 nguid[16];
uuid_t uuid;
u32 anagrpid;
bool buffered_io;
bool enabled;
@ -98,6 +98,18 @@ struct nvmet_sq {
struct completion confirm_done;
};
struct nvmet_ana_group {
struct config_group group;
struct nvmet_port *port;
u32 grpid;
};
static inline struct nvmet_ana_group *to_ana_group(struct config_item *item)
{
return container_of(to_config_group(item), struct nvmet_ana_group,
group);
}
/**
* struct nvmet_port - Common structure to keep port
* information for the target.
@ -115,6 +127,9 @@ struct nvmet_port {
struct list_head subsystems;
struct config_group referrals_group;
struct list_head referrals;
struct config_group ana_groups_group;
struct nvmet_ana_group ana_default_group;
enum nvme_ana_state *ana_state;
void *priv;
bool enabled;
int inline_data_size;
@ -126,6 +141,13 @@ static inline struct nvmet_port *to_nvmet_port(struct config_item *item)
group);
}
static inline struct nvmet_port *ana_groups_to_port(
struct config_item *item)
{
return container_of(to_config_group(item), struct nvmet_port,
ana_groups_group);
}
struct nvmet_ctrl {
struct nvmet_subsys *subsys;
struct nvmet_cq **cqs;
@ -140,6 +162,8 @@ struct nvmet_ctrl {
u16 cntlid;
u32 kato;
struct nvmet_port *port;
u32 aen_enabled;
unsigned long aen_masked;
struct nvmet_req *async_event_cmds[NVMET_ASYNC_EVENTS];
@ -168,6 +192,7 @@ struct nvmet_subsys {
struct kref ref;
struct list_head namespaces;
unsigned int nr_namespaces;
unsigned int max_nsid;
struct list_head ctrls;
@ -340,6 +365,10 @@ void nvmet_ns_disable(struct nvmet_ns *ns);
struct nvmet_ns *nvmet_ns_alloc(struct nvmet_subsys *subsys, u32 nsid);
void nvmet_ns_free(struct nvmet_ns *ns);
void nvmet_send_ana_event(struct nvmet_subsys *subsys,
struct nvmet_port *port);
void nvmet_port_send_ana_event(struct nvmet_port *port);
int nvmet_register_transport(const struct nvmet_fabrics_ops *ops);
void nvmet_unregister_transport(const struct nvmet_fabrics_ops *ops);
@ -360,6 +389,22 @@ u32 nvmet_get_log_page_len(struct nvme_command *cmd);
#define NVMET_QUEUE_SIZE 1024
#define NVMET_NR_QUEUES 128
#define NVMET_MAX_CMD NVMET_QUEUE_SIZE
/*
* Nice round number that makes a list of nsids fit into a page.
* Should become tunable at some point in the future.
*/
#define NVMET_MAX_NAMESPACES 1024
/*
* 0 is not a valid ANA group ID, so we start numbering at 1.
*
* ANA Group 1 exists without manual intervention, has namespaces assigned to it
* by default, and is available in an optimized state through all ports.
*/
#define NVMET_MAX_ANAGRPS 128
#define NVMET_DEFAULT_ANA_GRPID 1
#define NVMET_KAS 10
#define NVMET_DISC_KATO 120
@ -373,6 +418,10 @@ extern struct nvmet_subsys *nvmet_disc_subsys;
extern u64 nvmet_genctr;
extern struct rw_semaphore nvmet_config_sem;
extern u32 nvmet_ana_group_enabled[NVMET_MAX_ANAGRPS + 1];
extern u64 nvmet_ana_chgcnt;
extern struct rw_semaphore nvmet_ana_sem;
bool nvmet_host_allowed(struct nvmet_req *req, struct nvmet_subsys *subsys,
const char *hostnqn);

View File

@ -242,7 +242,12 @@ struct nvme_id_ctrl {
__le32 sanicap;
__le32 hmminds;
__le16 hmmaxd;
__u8 rsvd338[174];
__u8 rsvd338[4];
__u8 anatt;
__u8 anacap;
__le32 anagrpmax;
__le32 nanagrpid;
__u8 rsvd352[160];
__u8 sqes;
__u8 cqes;
__le16 maxcmd;
@ -258,7 +263,8 @@ struct nvme_id_ctrl {
__le16 acwu;
__u8 rsvd534[2];
__le32 sgls;
__u8 rsvd540[228];
__le32 mnan;
__u8 rsvd544[224];
char subnqn[256];
__u8 rsvd1024[768];
__le32 ioccsz;
@ -312,7 +318,9 @@ struct nvme_id_ns {
__le16 nabspf;
__le16 noiob;
__u8 nvmcap[16];
__u8 rsvd64[40];
__u8 rsvd64[28];
__le32 anagrpid;
__u8 rsvd96[8];
__u8 nguid[16];
__u8 eui64[8];
struct nvme_lbaf lbaf[16];
@ -425,6 +433,32 @@ struct nvme_effects_log {
__u8 resv[2048];
};
enum nvme_ana_state {
NVME_ANA_OPTIMIZED = 0x01,
NVME_ANA_NONOPTIMIZED = 0x02,
NVME_ANA_INACCESSIBLE = 0x03,
NVME_ANA_PERSISTENT_LOSS = 0x04,
NVME_ANA_CHANGE = 0x0f,
};
struct nvme_ana_group_desc {
__le32 grpid;
__le32 nnsids;
__le64 chgcnt;
__u8 state;
__u8 rsvd17[7];
__le32 nsids[];
};
/* flag for the log specific field of the ANA log */
#define NVME_ANA_LOG_RGO (1 << 0)
struct nvme_ana_rsp_hdr {
__le64 chgcnt;
__le16 ngrps;
__le16 rsvd10[3];
};
enum {
NVME_SMART_CRIT_SPARE = 1 << 0,
NVME_SMART_CRIT_TEMPERATURE = 1 << 1,
@ -444,11 +478,13 @@ enum {
enum {
NVME_AER_NOTICE_NS_CHANGED = 0x00,
NVME_AER_NOTICE_FW_ACT_STARTING = 0x01,
NVME_AER_NOTICE_ANA = 0x03,
};
enum {
NVME_AEN_CFG_NS_ATTR = 1 << 8,
NVME_AEN_CFG_FW_ACT = 1 << 9,
NVME_AEN_CFG_ANA_CHANGE = 1 << 11,
};
struct nvme_lba_range_type {
@ -763,6 +799,7 @@ enum {
NVME_LOG_FW_SLOT = 0x03,
NVME_LOG_CHANGED_NS = 0x04,
NVME_LOG_CMD_EFFECTS = 0x05,
NVME_LOG_ANA = 0x0c,
NVME_LOG_DISC = 0x70,
NVME_LOG_RESERVATION = 0x80,
NVME_FWACT_REPL = (0 << 3),
@ -885,7 +922,7 @@ struct nvme_get_log_page_command {
__u64 rsvd2[2];
union nvme_data_ptr dptr;
__u8 lid;
__u8 rsvd10;
__u8 lsp; /* upper 4 bits reserved */
__le16 numdl;
__le16 numdu;
__u16 rsvd11;
@ -1185,6 +1222,13 @@ enum {
NVME_SC_ACCESS_DENIED = 0x286,
NVME_SC_UNWRITTEN_BLOCK = 0x287,
/*
* Path-related Errors:
*/
NVME_SC_ANA_PERSISTENT_LOSS = 0x301,
NVME_SC_ANA_INACCESSIBLE = 0x302,
NVME_SC_ANA_TRANSITION = 0x303,
NVME_SC_DNR = 0x4000,
};