diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst index f0f03d5470b5..4bd0bee22a0d 100644 --- a/Documentation/admin-guide/cgroup-v2.rst +++ b/Documentation/admin-guide/cgroup-v2.rst @@ -184,6 +184,14 @@ cgroup v2 currently supports the following mount options. ignored on non-init namespace mounts. Please refer to the Delegation section for details. + [no]favordynmods + Reduce the latencies of dynamic cgroup modifications such as + task migrations and controller on/offs at the cost of making + hot path operations such as forks and exits more expensive. + The static usage pattern of creating a cgroup, enabling + controllers, and then seeding it with CLONE_INTO_CGROUP is + not affected by this option. + memory_[no]localevents Only populate memory.events with data for the current cgroup, and not any subtrees. This is legacy behaviour, the default diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 672de25e3ec8..63bf43c7ca3b 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -88,20 +88,33 @@ enum { */ CGRP_ROOT_NS_DELEGATE = (1 << 3), + /* + * Reduce latencies on dynamic cgroup modifications such as task + * migrations and controller on/offs by disabling percpu operation on + * cgroup_threadgroup_rwsem. This makes hot path operations such as + * forks and exits into the slow path and more expensive. + * + * The static usage pattern of creating a cgroup, enabling controllers, + * and then seeding it with CLONE_INTO_CGROUP doesn't require write + * locking cgroup_threadgroup_rwsem and thus doesn't benefit from + * favordynmod. + */ + CGRP_ROOT_FAVOR_DYNMODS = (1 << 4), + /* * Enable cpuset controller in v1 cgroup to use v2 behavior. */ - CGRP_ROOT_CPUSET_V2_MODE = (1 << 4), + CGRP_ROOT_CPUSET_V2_MODE = (1 << 16), /* * Enable legacy local memory.events. */ - CGRP_ROOT_MEMORY_LOCAL_EVENTS = (1 << 5), + CGRP_ROOT_MEMORY_LOCAL_EVENTS = (1 << 17), /* * Enable recursive subtree protection */ - CGRP_ROOT_MEMORY_RECURSIVE_PROT = (1 << 6), + CGRP_ROOT_MEMORY_RECURSIVE_PROT = (1 << 18), }; /* cftype->flags */ diff --git a/init/Kconfig b/init/Kconfig index c984afc489de..c93b10b3de3f 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -936,6 +936,16 @@ if CGROUPS config PAGE_COUNTER bool +config CGROUP_FAVOR_DYNMODS + bool "Favor dynamic modification latency reduction by default" + help + This option enables the "favordynmods" mount option by default + which reduces the latencies of dynamic cgroup modifications such + as task migrations and controller on/offs at the cost of making + hot path operations such as forks and exits more expensive. + + Say N if unsure. + config MEMCG bool "Memory controller" select PAGE_COUNTER diff --git a/kernel/cgroup/cgroup-internal.h b/kernel/cgroup/cgroup-internal.h index 5da09c74228d..36b740cb3d59 100644 --- a/kernel/cgroup/cgroup-internal.h +++ b/kernel/cgroup/cgroup-internal.h @@ -233,6 +233,7 @@ void cgroup_kn_unlock(struct kernfs_node *kn); int cgroup_path_ns_locked(struct cgroup *cgrp, char *buf, size_t buflen, struct cgroup_namespace *ns); +void cgroup_favor_dynmods(struct cgroup_root *root, bool favor); void cgroup_free_root(struct cgroup_root *root); void init_cgroup_root(struct cgroup_fs_context *ctx); int cgroup_setup_root(struct cgroup_root *root, u16 ss_mask); diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c index afc6c0e9c966..2ade21b54dc4 100644 --- a/kernel/cgroup/cgroup-v1.c +++ b/kernel/cgroup/cgroup-v1.c @@ -875,6 +875,8 @@ static int cgroup1_show_options(struct seq_file *seq, struct kernfs_root *kf_roo seq_puts(seq, ",xattr"); if (root->flags & CGRP_ROOT_CPUSET_V2_MODE) seq_puts(seq, ",cpuset_v2_mode"); + if (root->flags & CGRP_ROOT_FAVOR_DYNMODS) + seq_puts(seq, ",favordynmods"); spin_lock(&release_agent_path_lock); if (strlen(root->release_agent_path)) @@ -898,6 +900,8 @@ enum cgroup1_param { Opt_noprefix, Opt_release_agent, Opt_xattr, + Opt_favordynmods, + Opt_nofavordynmods, }; const struct fs_parameter_spec cgroup1_fs_parameters[] = { @@ -909,6 +913,8 @@ const struct fs_parameter_spec cgroup1_fs_parameters[] = { fsparam_flag ("noprefix", Opt_noprefix), fsparam_string("release_agent", Opt_release_agent), fsparam_flag ("xattr", Opt_xattr), + fsparam_flag ("favordynmods", Opt_favordynmods), + fsparam_flag ("nofavordynmods", Opt_nofavordynmods), {} }; @@ -960,6 +966,12 @@ int cgroup1_parse_param(struct fs_context *fc, struct fs_parameter *param) case Opt_xattr: ctx->flags |= CGRP_ROOT_XATTR; break; + case Opt_favordynmods: + ctx->flags |= CGRP_ROOT_FAVOR_DYNMODS; + break; + case Opt_nofavordynmods: + ctx->flags &= ~CGRP_ROOT_FAVOR_DYNMODS; + break; case Opt_release_agent: /* Specifying two release agents is forbidden */ if (ctx->release_agent) @@ -1211,8 +1223,11 @@ static int cgroup1_root_to_use(struct fs_context *fc) init_cgroup_root(ctx); ret = cgroup_setup_root(root, ctx->subsys_mask); - if (ret) + if (!ret) + cgroup_favor_dynmods(root, ctx->flags & CGRP_ROOT_FAVOR_DYNMODS); + else cgroup_free_root(root); + return ret; } diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 9ce24d5cf2d5..7d023d42a6a5 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -1305,6 +1305,20 @@ struct cgroup_root *cgroup_root_from_kf(struct kernfs_root *kf_root) return root_cgrp->root; } +void cgroup_favor_dynmods(struct cgroup_root *root, bool favor) +{ + bool favoring = root->flags & CGRP_ROOT_FAVOR_DYNMODS; + + /* see the comment above CGRP_ROOT_FAVOR_DYNMODS definition */ + if (favor && !favoring) { + rcu_sync_enter(&cgroup_threadgroup_rwsem.rss); + root->flags |= CGRP_ROOT_FAVOR_DYNMODS; + } else if (!favor && favoring) { + rcu_sync_exit(&cgroup_threadgroup_rwsem.rss); + root->flags &= ~CGRP_ROOT_FAVOR_DYNMODS; + } +} + static int cgroup_init_root_id(struct cgroup_root *root) { int id; @@ -1365,6 +1379,7 @@ static void cgroup_destroy_root(struct cgroup_root *root) cgroup_root_count--; } + cgroup_favor_dynmods(root, false); cgroup_exit_root_id(root); mutex_unlock(&cgroup_mutex); @@ -1858,6 +1873,7 @@ int cgroup_show_path(struct seq_file *sf, struct kernfs_node *kf_node, enum cgroup2_param { Opt_nsdelegate, Opt_nonsdelegate, + Opt_favordynmods, Opt_nofavordynmods, Opt_memory_localevents, Opt_memory_nolocalevents, Opt_memory_recursiveprot, Opt_memory_norecursiveprot, nr__cgroup2_params @@ -1866,6 +1882,8 @@ enum cgroup2_param { static const struct fs_parameter_spec cgroup2_fs_parameters[] = { fsparam_flag("nsdelegate", Opt_nsdelegate), fsparam_flag("nonsdelegate", Opt_nonsdelegate), + fsparam_flag("favordynmods", Opt_favordynmods), + fsparam_flag("nofavordynmods", Opt_nofavordynmods), fsparam_flag("memory_localevents", Opt_memory_localevents), fsparam_flag("memory_nolocalevents", Opt_memory_nolocalevents), fsparam_flag("memory_recursiveprot", Opt_memory_recursiveprot), @@ -1890,6 +1908,12 @@ static int cgroup2_parse_param(struct fs_context *fc, struct fs_parameter *param case Opt_nonsdelegate: ctx->flags &= ~CGRP_ROOT_NS_DELEGATE; return 0; + case Opt_favordynmods: + ctx->flags |= CGRP_ROOT_FAVOR_DYNMODS; + return 0; + case Opt_nofavordynmods: + ctx->flags &= ~CGRP_ROOT_FAVOR_DYNMODS; + return 0; case Opt_memory_localevents: ctx->flags |= CGRP_ROOT_MEMORY_LOCAL_EVENTS; return 0; @@ -1914,6 +1938,9 @@ static void apply_cgroup_root_flags(unsigned int root_flags) else cgrp_dfl_root.flags &= ~CGRP_ROOT_NS_DELEGATE; + cgroup_favor_dynmods(&cgrp_dfl_root, + root_flags & CGRP_ROOT_FAVOR_DYNMODS); + if (root_flags & CGRP_ROOT_MEMORY_LOCAL_EVENTS) cgrp_dfl_root.flags |= CGRP_ROOT_MEMORY_LOCAL_EVENTS; else @@ -1930,6 +1957,8 @@ static int cgroup_show_options(struct seq_file *seq, struct kernfs_root *kf_root { if (cgrp_dfl_root.flags & CGRP_ROOT_NS_DELEGATE) seq_puts(seq, ",nsdelegate"); + if (cgrp_dfl_root.flags & CGRP_ROOT_FAVOR_DYNMODS) + seq_puts(seq, ",favordynmods"); if (cgrp_dfl_root.flags & CGRP_ROOT_MEMORY_LOCAL_EVENTS) seq_puts(seq, ",memory_localevents"); if (cgrp_dfl_root.flags & CGRP_ROOT_MEMORY_RECURSIVE_PROT) @@ -1980,7 +2009,8 @@ void init_cgroup_root(struct cgroup_fs_context *ctx) cgrp->root = root; init_cgroup_housekeeping(cgrp); - root->flags = ctx->flags; + /* DYNMODS must be modified through cgroup_favor_dynmods() */ + root->flags = ctx->flags & ~CGRP_ROOT_FAVOR_DYNMODS; if (ctx->release_agent) strscpy(root->release_agent_path, ctx->release_agent, PATH_MAX); if (ctx->name) @@ -2202,6 +2232,10 @@ static int cgroup_init_fs_context(struct fs_context *fc) put_user_ns(fc->user_ns); fc->user_ns = get_user_ns(ctx->ns->user_ns); fc->global = true; + +#ifdef CONFIG_CGROUP_FAVOR_DYNMODS + ctx->flags |= CGRP_ROOT_FAVOR_DYNMODS; +#endif return 0; } @@ -5854,12 +5888,6 @@ int __init cgroup_init(void) cgroup_rstat_boot(); - /* - * The latency of the synchronize_rcu() is too high for cgroups, - * avoid it at the cost of forcing all readers into the slow path. - */ - rcu_sync_enter_start(&cgroup_threadgroup_rwsem.rss); - get_user_ns(init_cgroup_ns.user_ns); mutex_lock(&cgroup_mutex); @@ -6771,6 +6799,7 @@ static ssize_t features_show(struct kobject *kobj, struct kobj_attribute *attr, { return snprintf(buf, PAGE_SIZE, "nsdelegate\n" + "favordynmods\n" "memory_localevents\n" "memory_recursiveprot\n"); }