nvme-multipath: round-robin I/O policy
Implement a simple round-robin I/O policy for multipathing. Path selection is done in two rounds, first iterating across all optimized paths, and if that doesn't return any valid paths, iterate over all optimized and non-optimized paths. If no paths are found, use the existing algorithm. Also add a sysfs attribute 'iopolicy' to switch between the current NUMA-aware I/O policy and the 'round-robin' I/O policy. Signed-off-by: Hannes Reinecke <hare@suse.com> Signed-off-by: Christoph Hellwig <hch@lst.de>
This commit is contained in:
parent
49b1f22b56
commit
75c10e7327
|
@ -2328,6 +2328,9 @@ static struct attribute *nvme_subsys_attrs[] = {
|
||||||
&subsys_attr_serial.attr,
|
&subsys_attr_serial.attr,
|
||||||
&subsys_attr_firmware_rev.attr,
|
&subsys_attr_firmware_rev.attr,
|
||||||
&subsys_attr_subsysnqn.attr,
|
&subsys_attr_subsysnqn.attr,
|
||||||
|
#ifdef CONFIG_NVME_MULTIPATH
|
||||||
|
&subsys_attr_iopolicy.attr,
|
||||||
|
#endif
|
||||||
NULL,
|
NULL,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -2380,6 +2383,9 @@ static int nvme_init_subsystem(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id)
|
||||||
memcpy(subsys->firmware_rev, id->fr, sizeof(subsys->firmware_rev));
|
memcpy(subsys->firmware_rev, id->fr, sizeof(subsys->firmware_rev));
|
||||||
subsys->vendor_id = le16_to_cpu(id->vid);
|
subsys->vendor_id = le16_to_cpu(id->vid);
|
||||||
subsys->cmic = id->cmic;
|
subsys->cmic = id->cmic;
|
||||||
|
#ifdef CONFIG_NVME_MULTIPATH
|
||||||
|
subsys->iopolicy = NVME_IOPOLICY_NUMA;
|
||||||
|
#endif
|
||||||
|
|
||||||
subsys->dev.class = nvme_subsys_class;
|
subsys->dev.class = nvme_subsys_class;
|
||||||
subsys->dev.release = nvme_release_subsystem;
|
subsys->dev.release = nvme_release_subsystem;
|
||||||
|
|
|
@ -141,7 +141,10 @@ static struct nvme_ns *__nvme_find_path(struct nvme_ns_head *head, int node)
|
||||||
test_bit(NVME_NS_ANA_PENDING, &ns->flags))
|
test_bit(NVME_NS_ANA_PENDING, &ns->flags))
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
distance = node_distance(node, ns->ctrl->numa_node);
|
if (READ_ONCE(head->subsys->iopolicy) == NVME_IOPOLICY_NUMA)
|
||||||
|
distance = node_distance(node, ns->ctrl->numa_node);
|
||||||
|
else
|
||||||
|
distance = LOCAL_DISTANCE;
|
||||||
|
|
||||||
switch (ns->ana_state) {
|
switch (ns->ana_state) {
|
||||||
case NVME_ANA_OPTIMIZED:
|
case NVME_ANA_OPTIMIZED:
|
||||||
|
@ -168,6 +171,47 @@ static struct nvme_ns *__nvme_find_path(struct nvme_ns_head *head, int node)
|
||||||
return found;
|
return found;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static struct nvme_ns *nvme_next_ns(struct nvme_ns_head *head,
|
||||||
|
struct nvme_ns *ns)
|
||||||
|
{
|
||||||
|
ns = list_next_or_null_rcu(&head->list, &ns->siblings, struct nvme_ns,
|
||||||
|
siblings);
|
||||||
|
if (ns)
|
||||||
|
return ns;
|
||||||
|
return list_first_or_null_rcu(&head->list, struct nvme_ns, siblings);
|
||||||
|
}
|
||||||
|
|
||||||
|
static struct nvme_ns *nvme_round_robin_path(struct nvme_ns_head *head,
|
||||||
|
int node, struct nvme_ns *old)
|
||||||
|
{
|
||||||
|
struct nvme_ns *ns, *found, *fallback = NULL;
|
||||||
|
|
||||||
|
if (list_is_singular(&head->list))
|
||||||
|
return old;
|
||||||
|
|
||||||
|
for (ns = nvme_next_ns(head, old);
|
||||||
|
ns != old;
|
||||||
|
ns = nvme_next_ns(head, ns)) {
|
||||||
|
if (ns->ctrl->state != NVME_CTRL_LIVE ||
|
||||||
|
test_bit(NVME_NS_ANA_PENDING, &ns->flags))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
if (ns->ana_state == NVME_ANA_OPTIMIZED) {
|
||||||
|
found = ns;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
if (ns->ana_state == NVME_ANA_NONOPTIMIZED)
|
||||||
|
fallback = ns;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!fallback)
|
||||||
|
return NULL;
|
||||||
|
found = fallback;
|
||||||
|
out:
|
||||||
|
rcu_assign_pointer(head->current_path[node], found);
|
||||||
|
return found;
|
||||||
|
}
|
||||||
|
|
||||||
static inline bool nvme_path_is_optimized(struct nvme_ns *ns)
|
static inline bool nvme_path_is_optimized(struct nvme_ns *ns)
|
||||||
{
|
{
|
||||||
return ns->ctrl->state == NVME_CTRL_LIVE &&
|
return ns->ctrl->state == NVME_CTRL_LIVE &&
|
||||||
|
@ -180,6 +224,8 @@ inline struct nvme_ns *nvme_find_path(struct nvme_ns_head *head)
|
||||||
struct nvme_ns *ns;
|
struct nvme_ns *ns;
|
||||||
|
|
||||||
ns = srcu_dereference(head->current_path[node], &head->srcu);
|
ns = srcu_dereference(head->current_path[node], &head->srcu);
|
||||||
|
if (READ_ONCE(head->subsys->iopolicy) == NVME_IOPOLICY_RR && ns)
|
||||||
|
ns = nvme_round_robin_path(head, node, ns);
|
||||||
if (unlikely(!ns || !nvme_path_is_optimized(ns)))
|
if (unlikely(!ns || !nvme_path_is_optimized(ns)))
|
||||||
ns = __nvme_find_path(head, node);
|
ns = __nvme_find_path(head, node);
|
||||||
return ns;
|
return ns;
|
||||||
|
@ -471,6 +517,44 @@ void nvme_mpath_stop(struct nvme_ctrl *ctrl)
|
||||||
cancel_work_sync(&ctrl->ana_work);
|
cancel_work_sync(&ctrl->ana_work);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#define SUBSYS_ATTR_RW(_name, _mode, _show, _store) \
|
||||||
|
struct device_attribute subsys_attr_##_name = \
|
||||||
|
__ATTR(_name, _mode, _show, _store)
|
||||||
|
|
||||||
|
static const char *nvme_iopolicy_names[] = {
|
||||||
|
[NVME_IOPOLICY_NUMA] = "numa",
|
||||||
|
[NVME_IOPOLICY_RR] = "round-robin",
|
||||||
|
};
|
||||||
|
|
||||||
|
static ssize_t nvme_subsys_iopolicy_show(struct device *dev,
|
||||||
|
struct device_attribute *attr, char *buf)
|
||||||
|
{
|
||||||
|
struct nvme_subsystem *subsys =
|
||||||
|
container_of(dev, struct nvme_subsystem, dev);
|
||||||
|
|
||||||
|
return sprintf(buf, "%s\n",
|
||||||
|
nvme_iopolicy_names[READ_ONCE(subsys->iopolicy)]);
|
||||||
|
}
|
||||||
|
|
||||||
|
static ssize_t nvme_subsys_iopolicy_store(struct device *dev,
|
||||||
|
struct device_attribute *attr, const char *buf, size_t count)
|
||||||
|
{
|
||||||
|
struct nvme_subsystem *subsys =
|
||||||
|
container_of(dev, struct nvme_subsystem, dev);
|
||||||
|
int i;
|
||||||
|
|
||||||
|
for (i = 0; i < ARRAY_SIZE(nvme_iopolicy_names); i++) {
|
||||||
|
if (sysfs_streq(buf, nvme_iopolicy_names[i])) {
|
||||||
|
WRITE_ONCE(subsys->iopolicy, i);
|
||||||
|
return count;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return -EINVAL;
|
||||||
|
}
|
||||||
|
SUBSYS_ATTR_RW(iopolicy, S_IRUGO | S_IWUSR,
|
||||||
|
nvme_subsys_iopolicy_show, nvme_subsys_iopolicy_store);
|
||||||
|
|
||||||
static ssize_t ana_grpid_show(struct device *dev, struct device_attribute *attr,
|
static ssize_t ana_grpid_show(struct device *dev, struct device_attribute *attr,
|
||||||
char *buf)
|
char *buf)
|
||||||
{
|
{
|
||||||
|
|
|
@ -252,6 +252,11 @@ struct nvme_ctrl {
|
||||||
unsigned long discard_page_busy;
|
unsigned long discard_page_busy;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
enum nvme_iopolicy {
|
||||||
|
NVME_IOPOLICY_NUMA,
|
||||||
|
NVME_IOPOLICY_RR,
|
||||||
|
};
|
||||||
|
|
||||||
struct nvme_subsystem {
|
struct nvme_subsystem {
|
||||||
int instance;
|
int instance;
|
||||||
struct device dev;
|
struct device dev;
|
||||||
|
@ -271,6 +276,9 @@ struct nvme_subsystem {
|
||||||
u8 cmic;
|
u8 cmic;
|
||||||
u16 vendor_id;
|
u16 vendor_id;
|
||||||
struct ida ns_ida;
|
struct ida ns_ida;
|
||||||
|
#ifdef CONFIG_NVME_MULTIPATH
|
||||||
|
enum nvme_iopolicy iopolicy;
|
||||||
|
#endif
|
||||||
};
|
};
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -491,6 +499,7 @@ static inline void nvme_mpath_check_last_path(struct nvme_ns *ns)
|
||||||
|
|
||||||
extern struct device_attribute dev_attr_ana_grpid;
|
extern struct device_attribute dev_attr_ana_grpid;
|
||||||
extern struct device_attribute dev_attr_ana_state;
|
extern struct device_attribute dev_attr_ana_state;
|
||||||
|
extern struct device_attribute subsys_attr_iopolicy;
|
||||||
|
|
||||||
#else
|
#else
|
||||||
static inline bool nvme_ctrl_use_ana(struct nvme_ctrl *ctrl)
|
static inline bool nvme_ctrl_use_ana(struct nvme_ctrl *ctrl)
|
||||||
|
|
Loading…
Reference in New Issue