libceph: crush_location infrastructure
Allow expressing client's location in terms of CRUSH hierarchy as a set of (bucket type name, bucket name) pairs. The userspace syntax "crush_location = key1=value1 key2=value2" is incompatible with mount options and needed adaptation. Key-value pairs are separated by '|' and we use ':' instead of '=' to separate keys from values. So for: crush_location = host=foo rack=bar one would write: crush_location=host:foo|rack:bar As in userspace, "multipath" locations are supported, so indicating locality for parallel hierarchies is possible: crush_location=rack:foo1|rack:foo2|datacenter:bar Signed-off-by: Ilya Dryomov <idryomov@gmail.com> Reviewed-by: Jeff Layton <jlayton@kernel.org>
This commit is contained in:
parent
86403a92c3
commit
45e6aa9f55
|
@ -64,6 +64,7 @@ struct ceph_options {
|
|||
int num_mon;
|
||||
char *name;
|
||||
struct ceph_crypto_key *key;
|
||||
struct rb_root crush_locs;
|
||||
};
|
||||
|
||||
/*
|
||||
|
|
|
@ -302,9 +302,23 @@ bool ceph_pg_to_primary_shard(struct ceph_osdmap *osdmap,
|
|||
int ceph_pg_to_acting_primary(struct ceph_osdmap *osdmap,
|
||||
const struct ceph_pg *raw_pgid);
|
||||
|
||||
struct crush_loc {
|
||||
char *cl_type_name;
|
||||
char *cl_name;
|
||||
};
|
||||
|
||||
struct crush_loc_node {
|
||||
struct rb_node cl_node;
|
||||
struct crush_loc cl_loc; /* pointers into cl_data */
|
||||
char cl_data[];
|
||||
};
|
||||
|
||||
int ceph_parse_crush_location(char *crush_location, struct rb_root *locs);
|
||||
int ceph_compare_crush_locs(struct rb_root *locs1, struct rb_root *locs2);
|
||||
void ceph_clear_crush_locs(struct rb_root *locs);
|
||||
|
||||
extern struct ceph_pg_pool_info *ceph_pg_pool_by_id(struct ceph_osdmap *map,
|
||||
u64 id);
|
||||
|
||||
extern const char *ceph_pg_pool_name_by_id(struct ceph_osdmap *map, u64 id);
|
||||
extern int ceph_pg_poolid_by_name(struct ceph_osdmap *map, const char *name);
|
||||
u64 ceph_pg_pool_flags(struct ceph_osdmap *map, u64 id);
|
||||
|
|
|
@ -176,6 +176,10 @@ int ceph_compare_options(struct ceph_options *new_opt,
|
|||
}
|
||||
}
|
||||
|
||||
ret = ceph_compare_crush_locs(&opt1->crush_locs, &opt2->crush_locs);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
/* any matching mon ip implies a match */
|
||||
for (i = 0; i < opt1->num_mon; i++) {
|
||||
if (ceph_monmap_contains(client->monc.monmap,
|
||||
|
@ -260,6 +264,7 @@ enum {
|
|||
Opt_secret,
|
||||
Opt_key,
|
||||
Opt_ip,
|
||||
Opt_crush_location,
|
||||
/* string args above */
|
||||
Opt_share,
|
||||
Opt_crc,
|
||||
|
@ -274,6 +279,7 @@ static const struct fs_parameter_spec ceph_parameters[] = {
|
|||
fsparam_flag_no ("cephx_require_signatures", Opt_cephx_require_signatures),
|
||||
fsparam_flag_no ("cephx_sign_messages", Opt_cephx_sign_messages),
|
||||
fsparam_flag_no ("crc", Opt_crc),
|
||||
fsparam_string ("crush_location", Opt_crush_location),
|
||||
fsparam_string ("fsid", Opt_fsid),
|
||||
fsparam_string ("ip", Opt_ip),
|
||||
fsparam_string ("key", Opt_key),
|
||||
|
@ -298,6 +304,7 @@ struct ceph_options *ceph_alloc_options(void)
|
|||
if (!opt)
|
||||
return NULL;
|
||||
|
||||
opt->crush_locs = RB_ROOT;
|
||||
opt->mon_addr = kcalloc(CEPH_MAX_MON, sizeof(*opt->mon_addr),
|
||||
GFP_KERNEL);
|
||||
if (!opt->mon_addr) {
|
||||
|
@ -320,6 +327,7 @@ void ceph_destroy_options(struct ceph_options *opt)
|
|||
if (!opt)
|
||||
return;
|
||||
|
||||
ceph_clear_crush_locs(&opt->crush_locs);
|
||||
kfree(opt->name);
|
||||
if (opt->key) {
|
||||
ceph_crypto_key_destroy(opt->key);
|
||||
|
@ -454,6 +462,16 @@ int ceph_parse_param(struct fs_parameter *param, struct ceph_options *opt,
|
|||
if (!opt->key)
|
||||
return -ENOMEM;
|
||||
return get_secret(opt->key, param->string, &log);
|
||||
case Opt_crush_location:
|
||||
ceph_clear_crush_locs(&opt->crush_locs);
|
||||
err = ceph_parse_crush_location(param->string,
|
||||
&opt->crush_locs);
|
||||
if (err) {
|
||||
error_plog(&log, "Failed to parse CRUSH location: %d",
|
||||
err);
|
||||
return err;
|
||||
}
|
||||
break;
|
||||
|
||||
case Opt_osdtimeout:
|
||||
warn_plog(&log, "Ignoring osdtimeout");
|
||||
|
@ -536,6 +554,7 @@ int ceph_print_client_options(struct seq_file *m, struct ceph_client *client,
|
|||
{
|
||||
struct ceph_options *opt = client->options;
|
||||
size_t pos = m->count;
|
||||
struct rb_node *n;
|
||||
|
||||
if (opt->name) {
|
||||
seq_puts(m, "name=");
|
||||
|
@ -545,6 +564,23 @@ int ceph_print_client_options(struct seq_file *m, struct ceph_client *client,
|
|||
if (opt->key)
|
||||
seq_puts(m, "secret=<hidden>,");
|
||||
|
||||
if (!RB_EMPTY_ROOT(&opt->crush_locs)) {
|
||||
seq_puts(m, "crush_location=");
|
||||
for (n = rb_first(&opt->crush_locs); ; ) {
|
||||
struct crush_loc_node *loc =
|
||||
rb_entry(n, struct crush_loc_node, cl_node);
|
||||
|
||||
seq_printf(m, "%s:%s", loc->cl_loc.cl_type_name,
|
||||
loc->cl_loc.cl_name);
|
||||
n = rb_next(n);
|
||||
if (!n)
|
||||
break;
|
||||
|
||||
seq_putc(m, '|');
|
||||
}
|
||||
seq_putc(m, ',');
|
||||
}
|
||||
|
||||
if (opt->flags & CEPH_OPT_FSID)
|
||||
seq_printf(m, "fsid=%pU,", &opt->fsid);
|
||||
if (opt->flags & CEPH_OPT_NOSHARE)
|
||||
|
|
|
@ -2715,3 +2715,119 @@ int ceph_pg_to_acting_primary(struct ceph_osdmap *osdmap,
|
|||
return acting.primary;
|
||||
}
|
||||
EXPORT_SYMBOL(ceph_pg_to_acting_primary);
|
||||
|
||||
static struct crush_loc_node *alloc_crush_loc(size_t type_name_len,
|
||||
size_t name_len)
|
||||
{
|
||||
struct crush_loc_node *loc;
|
||||
|
||||
loc = kmalloc(sizeof(*loc) + type_name_len + name_len + 2, GFP_NOIO);
|
||||
if (!loc)
|
||||
return NULL;
|
||||
|
||||
RB_CLEAR_NODE(&loc->cl_node);
|
||||
return loc;
|
||||
}
|
||||
|
||||
static void free_crush_loc(struct crush_loc_node *loc)
|
||||
{
|
||||
WARN_ON(!RB_EMPTY_NODE(&loc->cl_node));
|
||||
|
||||
kfree(loc);
|
||||
}
|
||||
|
||||
static int crush_loc_compare(const struct crush_loc *loc1,
|
||||
const struct crush_loc *loc2)
|
||||
{
|
||||
return strcmp(loc1->cl_type_name, loc2->cl_type_name) ?:
|
||||
strcmp(loc1->cl_name, loc2->cl_name);
|
||||
}
|
||||
|
||||
DEFINE_RB_FUNCS2(crush_loc, struct crush_loc_node, cl_loc, crush_loc_compare,
|
||||
RB_BYPTR, const struct crush_loc *, cl_node)
|
||||
|
||||
/*
|
||||
* Parses a set of <bucket type name>':'<bucket name> pairs separated
|
||||
* by '|', e.g. "rack:foo1|rack:foo2|datacenter:bar".
|
||||
*
|
||||
* Note that @crush_location is modified by strsep().
|
||||
*/
|
||||
int ceph_parse_crush_location(char *crush_location, struct rb_root *locs)
|
||||
{
|
||||
struct crush_loc_node *loc;
|
||||
const char *type_name, *name, *colon;
|
||||
size_t type_name_len, name_len;
|
||||
|
||||
dout("%s '%s'\n", __func__, crush_location);
|
||||
while ((type_name = strsep(&crush_location, "|"))) {
|
||||
colon = strchr(type_name, ':');
|
||||
if (!colon)
|
||||
return -EINVAL;
|
||||
|
||||
type_name_len = colon - type_name;
|
||||
if (type_name_len == 0)
|
||||
return -EINVAL;
|
||||
|
||||
name = colon + 1;
|
||||
name_len = strlen(name);
|
||||
if (name_len == 0)
|
||||
return -EINVAL;
|
||||
|
||||
loc = alloc_crush_loc(type_name_len, name_len);
|
||||
if (!loc)
|
||||
return -ENOMEM;
|
||||
|
||||
loc->cl_loc.cl_type_name = loc->cl_data;
|
||||
memcpy(loc->cl_loc.cl_type_name, type_name, type_name_len);
|
||||
loc->cl_loc.cl_type_name[type_name_len] = '\0';
|
||||
|
||||
loc->cl_loc.cl_name = loc->cl_data + type_name_len + 1;
|
||||
memcpy(loc->cl_loc.cl_name, name, name_len);
|
||||
loc->cl_loc.cl_name[name_len] = '\0';
|
||||
|
||||
if (!__insert_crush_loc(locs, loc)) {
|
||||
free_crush_loc(loc);
|
||||
return -EEXIST;
|
||||
}
|
||||
|
||||
dout("%s type_name '%s' name '%s'\n", __func__,
|
||||
loc->cl_loc.cl_type_name, loc->cl_loc.cl_name);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int ceph_compare_crush_locs(struct rb_root *locs1, struct rb_root *locs2)
|
||||
{
|
||||
struct rb_node *n1 = rb_first(locs1);
|
||||
struct rb_node *n2 = rb_first(locs2);
|
||||
int ret;
|
||||
|
||||
for ( ; n1 && n2; n1 = rb_next(n1), n2 = rb_next(n2)) {
|
||||
struct crush_loc_node *loc1 =
|
||||
rb_entry(n1, struct crush_loc_node, cl_node);
|
||||
struct crush_loc_node *loc2 =
|
||||
rb_entry(n2, struct crush_loc_node, cl_node);
|
||||
|
||||
ret = crush_loc_compare(&loc1->cl_loc, &loc2->cl_loc);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (!n1 && n2)
|
||||
return -1;
|
||||
if (n1 && !n2)
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
void ceph_clear_crush_locs(struct rb_root *locs)
|
||||
{
|
||||
while (!RB_EMPTY_ROOT(locs)) {
|
||||
struct crush_loc_node *loc =
|
||||
rb_entry(rb_first(locs), struct crush_loc_node, cl_node);
|
||||
|
||||
erase_crush_loc(locs, loc);
|
||||
free_crush_loc(loc);
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue