From a60707d74bd1d119cf7bcc5101cda912fc46d5e3 Mon Sep 17 00:00:00 2001 From: Zhen Ni Date: Tue, 15 Feb 2022 19:45:57 +0800 Subject: [PATCH 01/28] sched: Move child_runs_first sysctls to fair.c move child_runs_first sysctls to fair.c and use the new register_sysctl_init() to register the sysctl interface. Signed-off-by: Zhen Ni Signed-off-by: Luis Chamberlain --- include/linux/sched/sysctl.h | 2 -- kernel/sched/fair.c | 19 +++++++++++++++++++ kernel/sched/sched.h | 2 ++ kernel/sysctl.c | 7 ------- 4 files changed, 21 insertions(+), 9 deletions(-) diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h index c1076b5e17fb..1d2ff3cd1728 100644 --- a/include/linux/sched/sysctl.h +++ b/include/linux/sched/sysctl.h @@ -14,8 +14,6 @@ extern unsigned long sysctl_hung_task_timeout_secs; enum { sysctl_hung_task_timeout_secs = 0 }; #endif -extern unsigned int sysctl_sched_child_runs_first; - enum sched_tunable_scaling { SCHED_TUNABLESCALING_NONE, SCHED_TUNABLESCALING_LOG, diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index d4bd299d67ab..788b1d6a3248 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -109,6 +109,25 @@ static unsigned int sched_nr_latency = 8; * parent will (try to) run first. */ unsigned int sysctl_sched_child_runs_first __read_mostly; +#ifdef CONFIG_SYSCTL +static struct ctl_table sched_child_runs_first_sysctls[] = { + { + .procname = "sched_child_runs_first", + .data = &sysctl_sched_child_runs_first, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + {} +}; + +static int __init sched_child_runs_first_sysctl_init(void) +{ + register_sysctl_init("kernel", sched_child_runs_first_sysctls); + return 0; +} +late_initcall(sched_child_runs_first_sysctl_init); +#endif /* * SCHED_OTHER wake-up granularity. diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 58263f90c559..767fc1de9646 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -108,6 +108,8 @@ extern __read_mostly int scheduler_running; extern unsigned long calc_load_update; extern atomic_long_t calc_load_tasks; +extern unsigned int sysctl_sched_child_runs_first; + extern void calc_global_load_tick(struct rq *this_rq); extern long calc_load_fold_active(struct rq *this_rq, long adjust); diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 830aaf8ca08e..6bbb8e1af675 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1659,13 +1659,6 @@ int proc_do_static_key(struct ctl_table *table, int write, } static struct ctl_table kern_table[] = { - { - .procname = "sched_child_runs_first", - .data = &sysctl_sched_child_runs_first, - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, #ifdef CONFIG_SCHEDSTATS { .procname = "sched_schedstats", From f5ef06d58be8311a9425e6a54a053ecb350952f3 Mon Sep 17 00:00:00 2001 From: Zhen Ni Date: Tue, 15 Feb 2022 19:45:58 +0800 Subject: [PATCH 02/28] sched: Move schedstats sysctls to core.c move schedstats sysctls to core.c and use the new register_sysctl_init() to register the sysctl interface. Signed-off-by: Zhen Ni Signed-off-by: Luis Chamberlain --- include/linux/sched/sysctl.h | 2 -- kernel/sched/core.c | 22 +++++++++++++++++++++- kernel/sysctl.c | 11 ----------- 3 files changed, 21 insertions(+), 14 deletions(-) diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h index 1d2ff3cd1728..6c7a6850559b 100644 --- a/include/linux/sched/sysctl.h +++ b/include/linux/sched/sysctl.h @@ -64,8 +64,6 @@ int sysctl_sched_uclamp_handler(struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos); int sysctl_numa_balancing(struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos); -int sysctl_schedstats(struct ctl_table *table, int write, void *buffer, - size_t *lenp, loff_t *ppos); #if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL) extern unsigned int sysctl_sched_energy_aware; diff --git a/kernel/sched/core.c b/kernel/sched/core.c index d575b4914925..04440da5955f 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -4430,7 +4430,7 @@ out: __setup("schedstats=", setup_schedstats); #ifdef CONFIG_PROC_SYSCTL -int sysctl_schedstats(struct ctl_table *table, int write, void *buffer, +static int sysctl_schedstats(struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { struct ctl_table t; @@ -4449,6 +4449,26 @@ int sysctl_schedstats(struct ctl_table *table, int write, void *buffer, set_schedstats(state); return err; } + +static struct ctl_table sched_schedstats_sysctls[] = { + { + .procname = "sched_schedstats", + .data = NULL, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = sysctl_schedstats, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, + }, + {} +}; + +static int __init sched_schedstats_sysctl_init(void) +{ + register_sysctl_init("kernel", sched_schedstats_sysctls); + return 0; +} +late_initcall(sched_schedstats_sysctl_init); #endif /* CONFIG_PROC_SYSCTL */ #endif /* CONFIG_SCHEDSTATS */ diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 6bbb8e1af675..fc0eeca20718 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1659,17 +1659,6 @@ int proc_do_static_key(struct ctl_table *table, int write, } static struct ctl_table kern_table[] = { -#ifdef CONFIG_SCHEDSTATS - { - .procname = "sched_schedstats", - .data = NULL, - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = sysctl_schedstats, - .extra1 = SYSCTL_ZERO, - .extra2 = SYSCTL_ONE, - }, -#endif /* CONFIG_SCHEDSTATS */ #ifdef CONFIG_TASK_DELAY_ACCT { .procname = "task_delayacct", From d9ab0e63fa7f8405fbb19e28c5191e0880a7f2db Mon Sep 17 00:00:00 2001 From: Zhen Ni Date: Tue, 15 Feb 2022 19:45:59 +0800 Subject: [PATCH 03/28] sched: Move rt_period/runtime sysctls to rt.c move rt_period/runtime sysctls to rt.c and use the new register_sysctl_init() to register the sysctl interface. Signed-off-by: Zhen Ni Signed-off-by: Luis Chamberlain --- include/linux/sched/sysctl.h | 11 --------- kernel/rcu/rcu.h | 2 ++ kernel/sched/core.c | 13 ----------- kernel/sched/rt.c | 43 +++++++++++++++++++++++++++++++++++- kernel/sched/sched.h | 4 ++++ kernel/sysctl.c | 14 ------------ 6 files changed, 48 insertions(+), 39 deletions(-) diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h index 6c7a6850559b..4391c1307945 100644 --- a/include/linux/sched/sysctl.h +++ b/include/linux/sched/sysctl.h @@ -31,15 +31,6 @@ extern int sysctl_numa_balancing_mode; #define sysctl_numa_balancing_mode 0 #endif -/* - * control realtime throttling: - * - * /proc/sys/kernel/sched_rt_period_us - * /proc/sys/kernel/sched_rt_runtime_us - */ -extern unsigned int sysctl_sched_rt_period; -extern int sysctl_sched_rt_runtime; - extern unsigned int sysctl_sched_dl_period_max; extern unsigned int sysctl_sched_dl_period_min; @@ -58,8 +49,6 @@ extern int sched_rr_timeslice; int sched_rr_handler(struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos); -int sched_rt_handler(struct ctl_table *table, int write, void *buffer, - size_t *lenp, loff_t *ppos); int sysctl_sched_uclamp_handler(struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos); int sysctl_numa_balancing(struct ctl_table *table, int write, void *buffer, diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h index 24b5f2c2de87..7812c740b3bf 100644 --- a/kernel/rcu/rcu.h +++ b/kernel/rcu/rcu.h @@ -23,6 +23,8 @@ #define RCU_SEQ_CTR_SHIFT 2 #define RCU_SEQ_STATE_MASK ((1 << RCU_SEQ_CTR_SHIFT) - 1) +extern int sysctl_sched_rt_runtime; + /* * Return the counter portion of a sequence number previously returned * by rcu_seq_snap() or rcu_seq_current(). diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 04440da5955f..774f3229db37 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -145,12 +145,6 @@ const_debug unsigned int sysctl_sched_nr_migrate = 8; const_debug unsigned int sysctl_sched_nr_migrate = 32; #endif -/* - * period over which we measure -rt task CPU usage in us. - * default: 1s - */ -unsigned int sysctl_sched_rt_period = 1000000; - __read_mostly int scheduler_running; #ifdef CONFIG_SCHED_CORE @@ -444,13 +438,6 @@ sched_core_dequeue(struct rq *rq, struct task_struct *p, int flags) { } #endif /* CONFIG_SCHED_CORE */ -/* - * part of the period that we allow rt tasks to run in us. - * default: 0.95s - */ -int sysctl_sched_rt_runtime = 950000; - - /* * Serialization rules: * diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index a32c46889af8..5663bb5ff890 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -13,6 +13,47 @@ static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun); struct rt_bandwidth def_rt_bandwidth; +/* + * period over which we measure -rt task CPU usage in us. + * default: 1s + */ +unsigned int sysctl_sched_rt_period = 1000000; + +/* + * part of the period that we allow rt tasks to run in us. + * default: 0.95s + */ +int sysctl_sched_rt_runtime = 950000; + +static int sched_rt_handler(struct ctl_table *table, int write, void *buffer, + size_t *lenp, loff_t *ppos); +#ifdef CONFIG_SYSCTL +static struct ctl_table sched_rt_sysctls[] = { + { + .procname = "sched_rt_period_us", + .data = &sysctl_sched_rt_period, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = sched_rt_handler, + }, + { + .procname = "sched_rt_runtime_us", + .data = &sysctl_sched_rt_runtime, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = sched_rt_handler, + }, + {} +}; + +static int __init sched_rt_sysctl_init(void) +{ + register_sysctl_init("kernel", sched_rt_sysctls); + return 0; +} +late_initcall(sched_rt_sysctl_init); +#endif + static enum hrtimer_restart sched_rt_period_timer(struct hrtimer *timer) { struct rt_bandwidth *rt_b = @@ -2925,7 +2966,7 @@ static void sched_rt_do_global(void) raw_spin_unlock_irqrestore(&def_rt_bandwidth.rt_runtime_lock, flags); } -int sched_rt_handler(struct ctl_table *table, int write, void *buffer, +static int sched_rt_handler(struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { int old_period, old_runtime; diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 767fc1de9646..3b406c78a8e9 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -114,6 +114,10 @@ extern void calc_global_load_tick(struct rq *this_rq); extern long calc_load_fold_active(struct rq *this_rq, long adjust); extern void call_trace_sched_update_nr_running(struct rq *rq, int count); + +extern unsigned int sysctl_sched_rt_period; +extern int sysctl_sched_rt_runtime; + /* * Helpers for converting nanosecond timing to jiffy resolution */ diff --git a/kernel/sysctl.c b/kernel/sysctl.c index fc0eeca20718..029bfe06c68d 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1681,20 +1681,6 @@ static struct ctl_table kern_table[] = { .extra2 = SYSCTL_FOUR, }, #endif /* CONFIG_NUMA_BALANCING */ - { - .procname = "sched_rt_period_us", - .data = &sysctl_sched_rt_period, - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = sched_rt_handler, - }, - { - .procname = "sched_rt_runtime_us", - .data = &sysctl_sched_rt_runtime, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = sched_rt_handler, - }, { .procname = "sched_deadline_period_max_us", .data = &sysctl_sched_dl_period_max, From 84227c12888b1105725cd2de14705b029bcbb4b2 Mon Sep 17 00:00:00 2001 From: Zhen Ni Date: Tue, 15 Feb 2022 19:46:00 +0800 Subject: [PATCH 04/28] sched: Move deadline_period sysctls to deadline.c move deadline_period sysctls to deadline.c and use the new register_sysctl_init() to register the sysctl interface. Signed-off-by: Zhen Ni Signed-off-by: Luis Chamberlain --- include/linux/sched/sysctl.h | 3 --- kernel/sched/deadline.c | 42 +++++++++++++++++++++++++++++------- kernel/sysctl.c | 14 ------------ 3 files changed, 34 insertions(+), 25 deletions(-) diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h index 4391c1307945..7da9b94c5e1c 100644 --- a/include/linux/sched/sysctl.h +++ b/include/linux/sched/sysctl.h @@ -31,9 +31,6 @@ extern int sysctl_numa_balancing_mode; #define sysctl_numa_balancing_mode 0 #endif -extern unsigned int sysctl_sched_dl_period_max; -extern unsigned int sysctl_sched_dl_period_min; - #ifdef CONFIG_UCLAMP_TASK extern unsigned int sysctl_sched_uclamp_util_min; extern unsigned int sysctl_sched_uclamp_util_max; diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index fb4255ae0b2c..82e10b74a7b2 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -16,6 +16,40 @@ * Fabio Checconi */ +/* + * Default limits for DL period; on the top end we guard against small util + * tasks still getting ridiculously long effective runtimes, on the bottom end we + * guard against timer DoS. + */ +static unsigned int sysctl_sched_dl_period_max = 1 << 22; /* ~4 seconds */ +static unsigned int sysctl_sched_dl_period_min = 100; /* 100 us */ +#ifdef CONFIG_SYSCTL +static struct ctl_table sched_dl_sysctls[] = { + { + .procname = "sched_deadline_period_max_us", + .data = &sysctl_sched_dl_period_max, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "sched_deadline_period_min_us", + .data = &sysctl_sched_dl_period_min, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + {} +}; + +static int __init sched_dl_sysctl_init(void) +{ + register_sysctl_init("kernel", sched_dl_sysctls); + return 0; +} +late_initcall(sched_dl_sysctl_init); +#endif + static inline struct task_struct *dl_task_of(struct sched_dl_entity *dl_se) { return container_of(dl_se, struct task_struct, dl); @@ -2879,14 +2913,6 @@ void __getparam_dl(struct task_struct *p, struct sched_attr *attr) attr->sched_flags |= dl_se->flags; } -/* - * Default limits for DL period; on the top end we guard against small util - * tasks still getting ridiculously long effective runtimes, on the bottom end we - * guard against timer DoS. - */ -unsigned int sysctl_sched_dl_period_max = 1 << 22; /* ~4 seconds */ -unsigned int sysctl_sched_dl_period_min = 100; /* 100 us */ - /* * This function validates the new parameters of a -deadline task. * We ask for the deadline not being zero, and greater or equal diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 029bfe06c68d..2c8c75e11a37 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1681,20 +1681,6 @@ static struct ctl_table kern_table[] = { .extra2 = SYSCTL_FOUR, }, #endif /* CONFIG_NUMA_BALANCING */ - { - .procname = "sched_deadline_period_max_us", - .data = &sysctl_sched_dl_period_max, - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, - { - .procname = "sched_deadline_period_min_us", - .data = &sysctl_sched_dl_period_min, - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, { .procname = "sched_rr_timeslice_ms", .data = &sysctl_sched_rr_timeslice, From dafd7a9dad22fadcb290b24dff54e2eae3b89776 Mon Sep 17 00:00:00 2001 From: Zhen Ni Date: Tue, 15 Feb 2022 19:46:01 +0800 Subject: [PATCH 05/28] sched: Move rr_timeslice sysctls to rt.c move rr_timeslice sysctls to rt.c and use the new register_sysctl_init() to register the sysctl interface. Signed-off-by: Zhen Ni Signed-off-by: Luis Chamberlain --- include/linux/sched/sysctl.h | 5 ----- kernel/sched/rt.c | 13 +++++++++++-- kernel/sched/sched.h | 1 + kernel/sysctl.c | 7 ------- 4 files changed, 12 insertions(+), 14 deletions(-) diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h index 7da9b94c5e1c..3d307e512d1f 100644 --- a/include/linux/sched/sysctl.h +++ b/include/linux/sched/sysctl.h @@ -41,11 +41,6 @@ extern unsigned int sysctl_sched_uclamp_util_min_rt_default; extern unsigned int sysctl_sched_cfs_bandwidth_slice; #endif -extern int sysctl_sched_rr_timeslice; -extern int sched_rr_timeslice; - -int sched_rr_handler(struct ctl_table *table, int write, void *buffer, - size_t *lenp, loff_t *ppos); int sysctl_sched_uclamp_handler(struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos); int sysctl_numa_balancing(struct ctl_table *table, int write, void *buffer, diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index 5663bb5ff890..71791be36065 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -5,7 +5,7 @@ */ int sched_rr_timeslice = RR_TIMESLICE; -int sysctl_sched_rr_timeslice = (MSEC_PER_SEC / HZ) * RR_TIMESLICE; +static int sysctl_sched_rr_timeslice = (MSEC_PER_SEC / HZ) * RR_TIMESLICE; /* More than 4 hours if BW_SHIFT equals 20. */ static const u64 max_rt_runtime = MAX_BW; @@ -27,6 +27,8 @@ int sysctl_sched_rt_runtime = 950000; static int sched_rt_handler(struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos); +static int sched_rr_handler(struct ctl_table *table, int write, void *buffer, + size_t *lenp, loff_t *ppos); #ifdef CONFIG_SYSCTL static struct ctl_table sched_rt_sysctls[] = { { @@ -43,6 +45,13 @@ static struct ctl_table sched_rt_sysctls[] = { .mode = 0644, .proc_handler = sched_rt_handler, }, + { + .procname = "sched_rr_timeslice_ms", + .data = &sysctl_sched_rr_timeslice, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = sched_rr_handler, + }, {} }; @@ -3005,7 +3014,7 @@ undo: return ret; } -int sched_rr_handler(struct ctl_table *table, int write, void *buffer, +static int sched_rr_handler(struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { int ret; diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 3b406c78a8e9..ae0f6e5a76f9 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -117,6 +117,7 @@ extern void call_trace_sched_update_nr_running(struct rq *rq, int count); extern unsigned int sysctl_sched_rt_period; extern int sysctl_sched_rt_runtime; +extern int sched_rr_timeslice; /* * Helpers for converting nanosecond timing to jiffy resolution diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 2c8c75e11a37..b074f70a3e11 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1681,13 +1681,6 @@ static struct ctl_table kern_table[] = { .extra2 = SYSCTL_FOUR, }, #endif /* CONFIG_NUMA_BALANCING */ - { - .procname = "sched_rr_timeslice_ms", - .data = &sysctl_sched_rr_timeslice, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = sched_rr_handler, - }, #ifdef CONFIG_UCLAMP_TASK { .procname = "sched_util_clamp_min", From 28f152cd0926596e69d412467b11b6fe6fe4e864 Mon Sep 17 00:00:00 2001 From: Baisong Zhong Date: Fri, 18 Mar 2022 10:54:17 +0800 Subject: [PATCH 06/28] sched/rt: fix build error when CONFIG_SYSCTL is disable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Avoid random build errors which do not select CONFIG_SYSCTL by depending on it in Kconfig. This fixes the following warning: In file included from kernel/sched/build_policy.c:43: At top level: kernel/sched/rt.c:3017:12: error: ‘sched_rr_handler’ defined but not used [-Werror=unused-function] 3017 | static int sched_rr_handler(struct ctl_table *table, int write, void *buffer, | ^~~~~~~~~~~~~~~~ kernel/sched/rt.c:2978:12: error: ‘sched_rt_handler’ defined but not used [-Werror=unused-function] 2978 | static int sched_rt_handler(struct ctl_table *table, int write, void *buffer, | ^~~~~~~~~~~~~~~~ cc1: all warnings being treated as errors make[2]: *** [scripts/Makefile.build:310: kernel/sched/build_policy.o] Error 1 make[1]: *** [scripts/Makefile.build:638: kernel/sched] Error 2 make[1]: *** Waiting for unfinished jobs.... Reported-by: Hulk Robot Signed-off-by: Baisong Zhong [mcgrof: small build fix, we need sched_rt_can_attach() even when CONFIG_SYSCTL is disabled] Signed-off-by: Luis Chamberlain --- kernel/sched/rt.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index 71791be36065..b491a0f8c25d 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -5,7 +5,6 @@ */ int sched_rr_timeslice = RR_TIMESLICE; -static int sysctl_sched_rr_timeslice = (MSEC_PER_SEC / HZ) * RR_TIMESLICE; /* More than 4 hours if BW_SHIFT equals 20. */ static const u64 max_rt_runtime = MAX_BW; @@ -25,11 +24,12 @@ unsigned int sysctl_sched_rt_period = 1000000; */ int sysctl_sched_rt_runtime = 950000; +#ifdef CONFIG_SYSCTL +static int sysctl_sched_rr_timeslice = (MSEC_PER_SEC / HZ) * RR_TIMESLICE; static int sched_rt_handler(struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos); static int sched_rr_handler(struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos); -#ifdef CONFIG_SYSCTL static struct ctl_table sched_rt_sysctls[] = { { .procname = "sched_rt_period_us", @@ -2911,6 +2911,7 @@ long sched_group_rt_period(struct task_group *tg) return rt_period_us; } +#ifdef CONFIG_SYSCTL static int sched_rt_global_constraints(void) { int ret = 0; @@ -2921,6 +2922,7 @@ static int sched_rt_global_constraints(void) return ret; } +#endif /* CONFIG_SYSCTL */ int sched_rt_can_attach(struct task_group *tg, struct task_struct *tsk) { @@ -2932,6 +2934,8 @@ int sched_rt_can_attach(struct task_group *tg, struct task_struct *tsk) } #else /* !CONFIG_RT_GROUP_SCHED */ + +#ifdef CONFIG_SYSCTL static int sched_rt_global_constraints(void) { unsigned long flags; @@ -2949,8 +2953,10 @@ static int sched_rt_global_constraints(void) return 0; } +#endif /* CONFIG_SYSCTL */ #endif /* CONFIG_RT_GROUP_SCHED */ +#ifdef CONFIG_SYSCTL static int sched_rt_global_validate(void) { if (sysctl_sched_rt_period <= 0) @@ -3035,6 +3041,7 @@ static int sched_rr_handler(struct ctl_table *table, int write, void *buffer, return ret; } +#endif /* CONFIG_SYSCTL */ #ifdef CONFIG_SCHED_DEBUG void print_rt_stats(struct seq_file *m, int cpu) From 3267e0156c3341ac25b37a0f60551cdae1634b60 Mon Sep 17 00:00:00 2001 From: Zhen Ni Date: Tue, 15 Feb 2022 19:46:02 +0800 Subject: [PATCH 07/28] sched: Move uclamp_util sysctls to core.c move uclamp_util sysctls to core.c and use the new register_sysctl_init() to register the sysctl interface. Signed-off-by: Zhen Ni Signed-off-by: Luis Chamberlain --- include/linux/sched/sysctl.h | 8 ------ kernel/sched/core.c | 48 +++++++++++++++++++++++++++--------- kernel/sysctl.c | 23 ----------------- 3 files changed, 37 insertions(+), 42 deletions(-) diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h index 3d307e512d1f..0934b21a57a4 100644 --- a/include/linux/sched/sysctl.h +++ b/include/linux/sched/sysctl.h @@ -31,18 +31,10 @@ extern int sysctl_numa_balancing_mode; #define sysctl_numa_balancing_mode 0 #endif -#ifdef CONFIG_UCLAMP_TASK -extern unsigned int sysctl_sched_uclamp_util_min; -extern unsigned int sysctl_sched_uclamp_util_max; -extern unsigned int sysctl_sched_uclamp_util_min_rt_default; -#endif - #ifdef CONFIG_CFS_BANDWIDTH extern unsigned int sysctl_sched_cfs_bandwidth_slice; #endif -int sysctl_sched_uclamp_handler(struct ctl_table *table, int write, - void *buffer, size_t *lenp, loff_t *ppos); int sysctl_numa_balancing(struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos); diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 774f3229db37..ef31751c5799 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -1306,10 +1306,10 @@ static void set_load_weight(struct task_struct *p, bool update_load) static DEFINE_MUTEX(uclamp_mutex); /* Max allowed minimum utilization */ -unsigned int sysctl_sched_uclamp_util_min = SCHED_CAPACITY_SCALE; +static unsigned int sysctl_sched_uclamp_util_min = SCHED_CAPACITY_SCALE; /* Max allowed maximum utilization */ -unsigned int sysctl_sched_uclamp_util_max = SCHED_CAPACITY_SCALE; +static unsigned int sysctl_sched_uclamp_util_max = SCHED_CAPACITY_SCALE; /* * By default RT tasks run at the maximum performance point/capacity of the @@ -1326,7 +1326,7 @@ unsigned int sysctl_sched_uclamp_util_max = SCHED_CAPACITY_SCALE; * This knob will not override the system default sched_util_clamp_min defined * above. */ -unsigned int sysctl_sched_uclamp_util_min_rt_default = SCHED_CAPACITY_SCALE; +static unsigned int sysctl_sched_uclamp_util_min_rt_default = SCHED_CAPACITY_SCALE; /* All clamps are required to be less or equal than these values */ static struct uclamp_se uclamp_default[UCLAMP_CNT]; @@ -1779,7 +1779,7 @@ static void uclamp_update_root_tg(void) static void uclamp_update_root_tg(void) { } #endif -int sysctl_sched_uclamp_handler(struct ctl_table *table, int write, +static int sysctl_sched_uclamp_handler(struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { bool update_root_tg = false; @@ -4436,8 +4436,12 @@ static int sysctl_schedstats(struct ctl_table *table, int write, void *buffer, set_schedstats(state); return err; } +#endif /* CONFIG_PROC_SYSCTL */ +#endif /* CONFIG_SCHEDSTATS */ -static struct ctl_table sched_schedstats_sysctls[] = { +#ifdef CONFIG_SYSCTL +static struct ctl_table sched_core_sysctls[] = { +#ifdef CONFIG_SCHEDSTATS { .procname = "sched_schedstats", .data = NULL, @@ -4447,17 +4451,39 @@ static struct ctl_table sched_schedstats_sysctls[] = { .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_ONE, }, +#endif /* CONFIG_SCHEDSTATS */ +#ifdef CONFIG_UCLAMP_TASK + { + .procname = "sched_util_clamp_min", + .data = &sysctl_sched_uclamp_util_min, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = sysctl_sched_uclamp_handler, + }, + { + .procname = "sched_util_clamp_max", + .data = &sysctl_sched_uclamp_util_max, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = sysctl_sched_uclamp_handler, + }, + { + .procname = "sched_util_clamp_min_rt_default", + .data = &sysctl_sched_uclamp_util_min_rt_default, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = sysctl_sched_uclamp_handler, + }, +#endif /* CONFIG_UCLAMP_TASK */ {} }; - -static int __init sched_schedstats_sysctl_init(void) +static int __init sched_core_sysctl_init(void) { - register_sysctl_init("kernel", sched_schedstats_sysctls); + register_sysctl_init("kernel", sched_core_sysctls); return 0; } -late_initcall(sched_schedstats_sysctl_init); -#endif /* CONFIG_PROC_SYSCTL */ -#endif /* CONFIG_SCHEDSTATS */ +late_initcall(sched_core_sysctl_init); +#endif /* CONFIG_SYSCTL */ /* * fork()/clone()-time setup: diff --git a/kernel/sysctl.c b/kernel/sysctl.c index b074f70a3e11..a48c090d57f9 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1681,29 +1681,6 @@ static struct ctl_table kern_table[] = { .extra2 = SYSCTL_FOUR, }, #endif /* CONFIG_NUMA_BALANCING */ -#ifdef CONFIG_UCLAMP_TASK - { - .procname = "sched_util_clamp_min", - .data = &sysctl_sched_uclamp_util_min, - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = sysctl_sched_uclamp_handler, - }, - { - .procname = "sched_util_clamp_max", - .data = &sysctl_sched_uclamp_util_max, - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = sysctl_sched_uclamp_handler, - }, - { - .procname = "sched_util_clamp_min_rt_default", - .data = &sysctl_sched_uclamp_util_min_rt_default, - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = sysctl_sched_uclamp_handler, - }, -#endif #ifdef CONFIG_CFS_BANDWIDTH { .procname = "sched_cfs_bandwidth_slice_us", From d4ae80ffa64f87b9c355692b680b603add084e96 Mon Sep 17 00:00:00 2001 From: Zhen Ni Date: Tue, 15 Feb 2022 19:46:03 +0800 Subject: [PATCH 08/28] sched: Move cfs_bandwidth_slice sysctls to fair.c move cfs_bandwidth_slice sysctls to fair.c and use the new register_sysctl_init() to register the sysctl interface. Signed-off-by: Zhen Ni Signed-off-by: Luis Chamberlain --- include/linux/sched/sysctl.h | 4 --- kernel/sched/fair.c | 51 ++++++++++++++++++++++-------------- kernel/sysctl.c | 10 ------- 3 files changed, 31 insertions(+), 34 deletions(-) diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h index 0934b21a57a4..198f77c8a873 100644 --- a/include/linux/sched/sysctl.h +++ b/include/linux/sched/sysctl.h @@ -31,10 +31,6 @@ extern int sysctl_numa_balancing_mode; #define sysctl_numa_balancing_mode 0 #endif -#ifdef CONFIG_CFS_BANDWIDTH -extern unsigned int sysctl_sched_cfs_bandwidth_slice; -#endif - int sysctl_numa_balancing(struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos); diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 788b1d6a3248..265bf7a75a37 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -109,25 +109,6 @@ static unsigned int sched_nr_latency = 8; * parent will (try to) run first. */ unsigned int sysctl_sched_child_runs_first __read_mostly; -#ifdef CONFIG_SYSCTL -static struct ctl_table sched_child_runs_first_sysctls[] = { - { - .procname = "sched_child_runs_first", - .data = &sysctl_sched_child_runs_first, - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, - {} -}; - -static int __init sched_child_runs_first_sysctl_init(void) -{ - register_sysctl_init("kernel", sched_child_runs_first_sysctls); - return 0; -} -late_initcall(sched_child_runs_first_sysctl_init); -#endif /* * SCHED_OTHER wake-up granularity. @@ -192,7 +173,37 @@ int __weak arch_asym_cpu_priority(int cpu) * * (default: 5 msec, units: microseconds) */ -unsigned int sysctl_sched_cfs_bandwidth_slice = 5000UL; +static unsigned int sysctl_sched_cfs_bandwidth_slice = 5000UL; +#endif + +#ifdef CONFIG_SYSCTL +static struct ctl_table sched_fair_sysctls[] = { + { + .procname = "sched_child_runs_first", + .data = &sysctl_sched_child_runs_first, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, +#ifdef CONFIG_CFS_BANDWIDTH + { + .procname = "sched_cfs_bandwidth_slice_us", + .data = &sysctl_sched_cfs_bandwidth_slice, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ONE, + }, +#endif + {} +}; + +static int __init sched_fair_sysctl_init(void) +{ + register_sysctl_init("kernel", sched_fair_sysctls); + return 0; +} +late_initcall(sched_fair_sysctl_init); #endif static inline void update_load_add(struct load_weight *lw, unsigned long inc) diff --git a/kernel/sysctl.c b/kernel/sysctl.c index a48c090d57f9..10ab81c7c457 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1681,16 +1681,6 @@ static struct ctl_table kern_table[] = { .extra2 = SYSCTL_FOUR, }, #endif /* CONFIG_NUMA_BALANCING */ -#ifdef CONFIG_CFS_BANDWIDTH - { - .procname = "sched_cfs_bandwidth_slice_us", - .data = &sysctl_sched_cfs_bandwidth_slice, - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = SYSCTL_ONE, - }, -#endif #if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL) { .procname = "sched_energy_aware", From 8a0441415b3f9b9a920a6a5086580ea3daa7b884 Mon Sep 17 00:00:00 2001 From: Zhen Ni Date: Tue, 15 Feb 2022 19:46:04 +0800 Subject: [PATCH 09/28] sched: Move energy_aware sysctls to topology.c move energy_aware sysctls to topology.c and use the new register_sysctl_init() to register the sysctl interface. Signed-off-by: Zhen Ni Signed-off-by: Luis Chamberlain --- include/linux/sched/sysctl.h | 6 ------ kernel/sched/topology.c | 25 +++++++++++++++++++++++-- kernel/sysctl.c | 11 ----------- 3 files changed, 23 insertions(+), 19 deletions(-) diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h index 198f77c8a873..e650946816d0 100644 --- a/include/linux/sched/sysctl.h +++ b/include/linux/sched/sysctl.h @@ -34,10 +34,4 @@ extern int sysctl_numa_balancing_mode; int sysctl_numa_balancing(struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos); -#if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL) -extern unsigned int sysctl_sched_energy_aware; -int sched_energy_aware_handler(struct ctl_table *table, int write, - void *buffer, size_t *lenp, loff_t *ppos); -#endif - #endif /* _LINUX_SCHED_SYSCTL_H */ diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c index 810750e62118..05b6c2ad90b9 100644 --- a/kernel/sched/topology.c +++ b/kernel/sched/topology.c @@ -206,7 +206,7 @@ sd_parent_degenerate(struct sched_domain *sd, struct sched_domain *parent) #if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL) DEFINE_STATIC_KEY_FALSE(sched_energy_present); -unsigned int sysctl_sched_energy_aware = 1; +static unsigned int sysctl_sched_energy_aware = 1; DEFINE_MUTEX(sched_energy_mutex); bool sched_energy_update; @@ -220,7 +220,7 @@ void rebuild_sched_domains_energy(void) } #ifdef CONFIG_PROC_SYSCTL -int sched_energy_aware_handler(struct ctl_table *table, int write, +static int sched_energy_aware_handler(struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { int ret, state; @@ -237,6 +237,27 @@ int sched_energy_aware_handler(struct ctl_table *table, int write, return ret; } + +static struct ctl_table sched_energy_aware_sysctls[] = { + { + .procname = "sched_energy_aware", + .data = &sysctl_sched_energy_aware, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = sched_energy_aware_handler, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, + }, + {} +}; + +static int __init sched_energy_aware_sysctl_init(void) +{ + register_sysctl_init("kernel", sched_energy_aware_sysctls); + return 0; +} + +late_initcall(sched_energy_aware_sysctl_init); #endif static void free_pd(struct perf_domain *pd) diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 10ab81c7c457..8241c5401ee8 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1681,17 +1681,6 @@ static struct ctl_table kern_table[] = { .extra2 = SYSCTL_FOUR, }, #endif /* CONFIG_NUMA_BALANCING */ -#if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL) - { - .procname = "sched_energy_aware", - .data = &sysctl_sched_energy_aware, - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = sched_energy_aware_handler, - .extra1 = SYSCTL_ZERO, - .extra2 = SYSCTL_ONE, - }, -#endif #ifdef CONFIG_PROVE_LOCKING { .procname = "prove_locking", From 06d177662fb86b80c7fc2290667b9a14cb0bd925 Mon Sep 17 00:00:00 2001 From: tangmeng Date: Thu, 17 Feb 2022 12:23:21 +0800 Subject: [PATCH 10/28] kernel/reboot: move reboot sysctls to its own file kernel/sysctl.c is a kitchen sink where everyone leaves their dirty dishes, this makes it very difficult to maintain. To help with this maintenance let's start by moving sysctls to places where they actually belong. The proc sysctl maintainers do not want to know what sysctl knobs you wish to add for your own piece of code, we just care about the core logic. All filesystem syctls now get reviewed by fs folks. This commit follows the commit of fs, move the poweroff_cmd and ctrl-alt-del sysctls to its own file, kernel/reboot.c. Signed-off-by: tangmeng Signed-off-by: Luis Chamberlain --- include/linux/reboot.h | 4 ---- kernel/reboot.c | 34 ++++++++++++++++++++++++++++++++-- kernel/sysctl.c | 14 -------------- 3 files changed, 32 insertions(+), 20 deletions(-) diff --git a/include/linux/reboot.h b/include/linux/reboot.h index af907a3d68d1..a2429648d831 100644 --- a/include/linux/reboot.h +++ b/include/linux/reboot.h @@ -71,12 +71,8 @@ extern void kernel_restart(char *cmd); extern void kernel_halt(void); extern void kernel_power_off(void); -extern int C_A_D; /* for sysctl */ void ctrl_alt_del(void); -#define POWEROFF_CMD_PATH_LEN 256 -extern char poweroff_cmd[POWEROFF_CMD_PATH_LEN]; - extern void orderly_poweroff(bool force); extern void orderly_reboot(void); void hw_protection_shutdown(const char *reason, int ms_until_forced); diff --git a/kernel/reboot.c b/kernel/reboot.c index 6bcc5d6a6572..ed4e6dfb7d44 100644 --- a/kernel/reboot.c +++ b/kernel/reboot.c @@ -23,7 +23,7 @@ * this indicates whether you can reboot with ctrl-alt-del: the default is yes */ -int C_A_D = 1; +static int C_A_D = 1; struct pid *cad_pid; EXPORT_SYMBOL(cad_pid); @@ -417,9 +417,37 @@ void ctrl_alt_del(void) kill_cad_pid(SIGINT, 1); } -char poweroff_cmd[POWEROFF_CMD_PATH_LEN] = "/sbin/poweroff"; +#define POWEROFF_CMD_PATH_LEN 256 +static char poweroff_cmd[POWEROFF_CMD_PATH_LEN] = "/sbin/poweroff"; static const char reboot_cmd[] = "/sbin/reboot"; +#ifdef CONFIG_SYSCTL +static struct ctl_table kern_reboot_table[] = { + { + .procname = "poweroff_cmd", + .data = &poweroff_cmd, + .maxlen = POWEROFF_CMD_PATH_LEN, + .mode = 0644, + .proc_handler = proc_dostring, + }, + { + .procname = "ctrl-alt-del", + .data = &C_A_D, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { } +}; + +static void __init kernel_reboot_sysctls_init(void) +{ + register_sysctl_init("kernel", kern_reboot_table); +} +#else +#define kernel_reboot_sysctls_init() do { } while (0) +#endif /* CONFIG_SYSCTL */ + static int run_cmd(const char *cmd) { char **argv; @@ -886,6 +914,8 @@ static int __init reboot_ksysfs_init(void) return ret; } + kernel_reboot_sysctls_init(); + return 0; } late_initcall(reboot_ksysfs_init); diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 8241c5401ee8..5e43569ce2be 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1798,13 +1798,6 @@ static struct ctl_table kern_table[] = { .proc_handler = proc_dointvec, }, #endif - { - .procname = "ctrl-alt-del", - .data = &C_A_D, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, #ifdef CONFIG_FUNCTION_TRACER { .procname = "ftrace_enabled", @@ -2111,13 +2104,6 @@ static struct ctl_table kern_table[] = { .proc_handler = proc_dointvec, }, #endif - { - .procname = "poweroff_cmd", - .data = &poweroff_cmd, - .maxlen = POWEROFF_CMD_PATH_LEN, - .mode = 0644, - .proc_handler = proc_dostring, - }, #ifdef CONFIG_KEYS { .procname = "keys", From 43fe219aa56a2fdd8f0623c9470a32b14b0617a5 Mon Sep 17 00:00:00 2001 From: sujiaxun Date: Thu, 17 Feb 2022 18:51:48 -0800 Subject: [PATCH 11/28] mm: move oom_kill sysctls to their own file kernel/sysctl.c is a kitchen sink where everyone leaves their dirty dishes, this makes it very difficult to maintain. To help with this maintenance let's start by moving sysctls to places where they actually belong. The proc sysctl maintainers do not want to know what sysctl knobs you wish to add for your own piece of code, we just care about the core logic. So move the oom_kill sysctls to their own file, mm/oom_kill.c [sfr@canb.auug.org.au: null-terminate the array] Link: https://lkml.kernel.org/r/20220216193202.28838626@canb.auug.org.au Link: https://lkml.kernel.org/r/20220215093203.31032-1-sujiaxun@uniontech.com Signed-off-by: sujiaxun Signed-off-by: Stephen Rothwell Cc: Kees Cook Cc: Iurii Zaikin Cc: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Luis Chamberlain --- include/linux/oom.h | 4 ---- kernel/sysctl.c | 23 ----------------------- mm/oom_kill.c | 38 +++++++++++++++++++++++++++++++++++--- 3 files changed, 35 insertions(+), 30 deletions(-) diff --git a/include/linux/oom.h b/include/linux/oom.h index 2db9a1432511..02d1e7bbd8cd 100644 --- a/include/linux/oom.h +++ b/include/linux/oom.h @@ -123,8 +123,4 @@ extern void oom_killer_enable(void); extern struct task_struct *find_lock_task_mm(struct task_struct *p); -/* sysctls */ -extern int sysctl_oom_dump_tasks; -extern int sysctl_oom_kill_allocating_task; -extern int sysctl_panic_on_oom; #endif /* _INCLUDE_LINUX_OOM_H */ diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 5e43569ce2be..a21c0ea396f3 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -2241,29 +2241,6 @@ static struct ctl_table vm_table[] = { .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_TWO, }, - { - .procname = "panic_on_oom", - .data = &sysctl_panic_on_oom, - .maxlen = sizeof(sysctl_panic_on_oom), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = SYSCTL_ZERO, - .extra2 = SYSCTL_TWO, - }, - { - .procname = "oom_kill_allocating_task", - .data = &sysctl_oom_kill_allocating_task, - .maxlen = sizeof(sysctl_oom_kill_allocating_task), - .mode = 0644, - .proc_handler = proc_dointvec, - }, - { - .procname = "oom_dump_tasks", - .data = &sysctl_oom_dump_tasks, - .maxlen = sizeof(sysctl_oom_dump_tasks), - .mode = 0644, - .proc_handler = proc_dointvec, - }, { .procname = "overcommit_ratio", .data = &sysctl_overcommit_ratio, diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 7ec38194f8e1..7cc338a9e9e4 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -52,9 +52,38 @@ #define CREATE_TRACE_POINTS #include -int sysctl_panic_on_oom; -int sysctl_oom_kill_allocating_task; -int sysctl_oom_dump_tasks = 1; +static int sysctl_panic_on_oom; +static int sysctl_oom_kill_allocating_task; +static int sysctl_oom_dump_tasks = 1; + +#ifdef CONFIG_SYSCTL +static struct ctl_table vm_oom_kill_table[] = { + { + .procname = "panic_on_oom", + .data = &sysctl_panic_on_oom, + .maxlen = sizeof(sysctl_panic_on_oom), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_TWO, + }, + { + .procname = "oom_kill_allocating_task", + .data = &sysctl_oom_kill_allocating_task, + .maxlen = sizeof(sysctl_oom_kill_allocating_task), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "oom_dump_tasks", + .data = &sysctl_oom_dump_tasks, + .maxlen = sizeof(sysctl_oom_dump_tasks), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + {} +}; +#endif /* * Serializes oom killer invocations (out_of_memory()) from all contexts to @@ -677,6 +706,9 @@ static void wake_oom_reaper(struct task_struct *tsk) static int __init oom_init(void) { oom_reaper_th = kthread_run(oom_reaper, NULL, "oom_reaper"); +#ifdef CONFIG_SYSCTL + register_sysctl_init("vm", vm_oom_kill_table); +#endif return 0; } subsys_initcall(oom_init) From aa779e5102195e1d9ade95dcbc0bfbd8f916eb59 Mon Sep 17 00:00:00 2001 From: zhanglianjie Date: Thu, 17 Feb 2022 18:51:51 -0800 Subject: [PATCH 12/28] mm: move page-writeback sysctls to their own file kernel/sysctl.c is a kitchen sink where everyone leaves their dirty dishes, this makes it very difficult to maintain. To help with this maintenance let's start by moving sysctls to places where they actually belong. The proc sysctl maintainers do not want to know what sysctl knobs you wish to add for your own piece of code, we just care about the core logic. So move the page-writeback sysctls to its own file. [akpm@linux-foundation.org: coding-style cleanups] akpm@linux-foundation.org: fix CONFIG_SYSCTL=n warnings] Link: https://lkml.kernel.org/r/20220129012955.26594-1-zhanglianjie@uniontech.com Signed-off-by: zhanglianjie Cc: Kees Cook Cc: Iurii Zaikin Cc: Luis Chamberlain Signed-off-by: Andrew Morton Signed-off-by: Luis Chamberlain --- include/linux/writeback.h | 15 ------ kernel/sysctl.c | 69 ------------------------- mm/page-writeback.c | 103 ++++++++++++++++++++++++++++++++++---- 3 files changed, 93 insertions(+), 94 deletions(-) diff --git a/include/linux/writeback.h b/include/linux/writeback.h index fec248ab1fec..dc2b94e6a94f 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -345,28 +345,13 @@ void wb_domain_exit(struct wb_domain *dom); extern struct wb_domain global_wb_domain; /* These are exported to sysctl. */ -extern int dirty_background_ratio; -extern unsigned long dirty_background_bytes; -extern int vm_dirty_ratio; -extern unsigned long vm_dirty_bytes; extern unsigned int dirty_writeback_interval; extern unsigned int dirty_expire_interval; extern unsigned int dirtytime_expire_interval; -extern int vm_highmem_is_dirtyable; extern int laptop_mode; -int dirty_background_ratio_handler(struct ctl_table *table, int write, - void *buffer, size_t *lenp, loff_t *ppos); -int dirty_background_bytes_handler(struct ctl_table *table, int write, - void *buffer, size_t *lenp, loff_t *ppos); -int dirty_ratio_handler(struct ctl_table *table, int write, - void *buffer, size_t *lenp, loff_t *ppos); -int dirty_bytes_handler(struct ctl_table *table, int write, - void *buffer, size_t *lenp, loff_t *ppos); int dirtytime_interval_handler(struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos); -int dirty_writeback_centisecs_handler(struct ctl_table *table, int write, - void *buffer, size_t *lenp, loff_t *ppos); void global_dirty_limits(unsigned long *pbackground, unsigned long *pdirty); unsigned long wb_calc_thresh(struct bdi_writeback *wb, unsigned long thresh); diff --git a/kernel/sysctl.c b/kernel/sysctl.c index a21c0ea396f3..36bfe1c92d44 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -100,8 +100,6 @@ static const int six_hundred_forty_kb = 640 * 1024; #endif -/* this is needed for the proc_doulongvec_minmax of vm_dirty_bytes */ -static const unsigned long dirty_bytes_min = 2 * PAGE_SIZE; static const int ngroups_max = NGROUPS_MAX; static const int cap_last_cap = CAP_LAST_CAP; @@ -2263,55 +2261,6 @@ static struct ctl_table vm_table[] = { .proc_handler = proc_dointvec_minmax, .extra1 = SYSCTL_ZERO, }, - { - .procname = "dirty_background_ratio", - .data = &dirty_background_ratio, - .maxlen = sizeof(dirty_background_ratio), - .mode = 0644, - .proc_handler = dirty_background_ratio_handler, - .extra1 = SYSCTL_ZERO, - .extra2 = SYSCTL_ONE_HUNDRED, - }, - { - .procname = "dirty_background_bytes", - .data = &dirty_background_bytes, - .maxlen = sizeof(dirty_background_bytes), - .mode = 0644, - .proc_handler = dirty_background_bytes_handler, - .extra1 = SYSCTL_LONG_ONE, - }, - { - .procname = "dirty_ratio", - .data = &vm_dirty_ratio, - .maxlen = sizeof(vm_dirty_ratio), - .mode = 0644, - .proc_handler = dirty_ratio_handler, - .extra1 = SYSCTL_ZERO, - .extra2 = SYSCTL_ONE_HUNDRED, - }, - { - .procname = "dirty_bytes", - .data = &vm_dirty_bytes, - .maxlen = sizeof(vm_dirty_bytes), - .mode = 0644, - .proc_handler = dirty_bytes_handler, - .extra1 = (void *)&dirty_bytes_min, - }, - { - .procname = "dirty_writeback_centisecs", - .data = &dirty_writeback_interval, - .maxlen = sizeof(dirty_writeback_interval), - .mode = 0644, - .proc_handler = dirty_writeback_centisecs_handler, - }, - { - .procname = "dirty_expire_centisecs", - .data = &dirty_expire_interval, - .maxlen = sizeof(dirty_expire_interval), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = SYSCTL_ZERO, - }, { .procname = "dirtytime_expire_seconds", .data = &dirtytime_expire_interval, @@ -2483,13 +2432,6 @@ static struct ctl_table vm_table[] = { .extra1 = SYSCTL_ZERO, }, #endif - { - .procname = "laptop_mode", - .data = &laptop_mode, - .maxlen = sizeof(laptop_mode), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, { .procname = "vfs_cache_pressure", .data = &sysctl_vfs_cache_pressure, @@ -2587,17 +2529,6 @@ static struct ctl_table vm_table[] = { .extra1 = SYSCTL_ZERO, }, #endif -#ifdef CONFIG_HIGHMEM - { - .procname = "highmem_is_dirtyable", - .data = &vm_highmem_is_dirtyable, - .maxlen = sizeof(vm_highmem_is_dirtyable), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = SYSCTL_ZERO, - .extra2 = SYSCTL_ONE, - }, -#endif #ifdef CONFIG_MEMORY_FAILURE { .procname = "memory_failure_early_kill", diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 7e2da284e427..438762173a59 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -70,30 +70,33 @@ static long ratelimit_pages = 32; /* * Start background writeback (via writeback threads) at this percentage */ -int dirty_background_ratio = 10; +static int dirty_background_ratio = 10; /* * dirty_background_bytes starts at 0 (disabled) so that it is a function of * dirty_background_ratio * the amount of dirtyable memory */ -unsigned long dirty_background_bytes; +static unsigned long dirty_background_bytes; /* * free highmem will not be subtracted from the total free memory * for calculating free ratios if vm_highmem_is_dirtyable is true */ -int vm_highmem_is_dirtyable; +static int vm_highmem_is_dirtyable; /* * The generator of dirty data starts writeback at this percentage */ -int vm_dirty_ratio = 20; +static int vm_dirty_ratio = 20; + +/* this is needed for the proc_doulongvec_minmax of vm_dirty_bytes */ +static const unsigned long dirty_bytes_min = 2 * PAGE_SIZE; /* * vm_dirty_bytes starts at 0 (disabled) so that it is a function of * vm_dirty_ratio * the amount of dirtyable memory */ -unsigned long vm_dirty_bytes; +static unsigned long vm_dirty_bytes; /* * The interval between `kupdate'-style writebacks @@ -491,7 +494,8 @@ bool node_dirty_ok(struct pglist_data *pgdat) return nr_pages <= limit; } -int dirty_background_ratio_handler(struct ctl_table *table, int write, +#ifdef CONFIG_SYSCTL +static int dirty_background_ratio_handler(struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { int ret; @@ -502,7 +506,7 @@ int dirty_background_ratio_handler(struct ctl_table *table, int write, return ret; } -int dirty_background_bytes_handler(struct ctl_table *table, int write, +static int dirty_background_bytes_handler(struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { int ret; @@ -513,7 +517,7 @@ int dirty_background_bytes_handler(struct ctl_table *table, int write, return ret; } -int dirty_ratio_handler(struct ctl_table *table, int write, void *buffer, +static int dirty_ratio_handler(struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { int old_ratio = vm_dirty_ratio; @@ -527,7 +531,7 @@ int dirty_ratio_handler(struct ctl_table *table, int write, void *buffer, return ret; } -int dirty_bytes_handler(struct ctl_table *table, int write, +static int dirty_bytes_handler(struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { unsigned long old_bytes = vm_dirty_bytes; @@ -540,6 +544,7 @@ int dirty_bytes_handler(struct ctl_table *table, int write, } return ret; } +#endif static unsigned long wp_next_time(unsigned long cur_time) { @@ -1981,10 +1986,11 @@ bool wb_over_bg_thresh(struct bdi_writeback *wb) return false; } +#ifdef CONFIG_SYSCTL /* * sysctl handler for /proc/sys/vm/dirty_writeback_centisecs */ -int dirty_writeback_centisecs_handler(struct ctl_table *table, int write, +static int dirty_writeback_centisecs_handler(struct ctl_table *table, int write, void *buffer, size_t *length, loff_t *ppos) { unsigned int old_interval = dirty_writeback_interval; @@ -2005,6 +2011,7 @@ int dirty_writeback_centisecs_handler(struct ctl_table *table, int write, return ret; } +#endif void laptop_mode_timer_fn(struct timer_list *t) { @@ -2069,6 +2076,79 @@ static int page_writeback_cpu_online(unsigned int cpu) return 0; } +#ifdef CONFIG_SYSCTL +static struct ctl_table vm_page_writeback_sysctls[] = { + { + .procname = "dirty_background_ratio", + .data = &dirty_background_ratio, + .maxlen = sizeof(dirty_background_ratio), + .mode = 0644, + .proc_handler = dirty_background_ratio_handler, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE_HUNDRED, + }, + { + .procname = "dirty_background_bytes", + .data = &dirty_background_bytes, + .maxlen = sizeof(dirty_background_bytes), + .mode = 0644, + .proc_handler = dirty_background_bytes_handler, + .extra1 = SYSCTL_LONG_ONE, + }, + { + .procname = "dirty_ratio", + .data = &vm_dirty_ratio, + .maxlen = sizeof(vm_dirty_ratio), + .mode = 0644, + .proc_handler = dirty_ratio_handler, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE_HUNDRED, + }, + { + .procname = "dirty_bytes", + .data = &vm_dirty_bytes, + .maxlen = sizeof(vm_dirty_bytes), + .mode = 0644, + .proc_handler = dirty_bytes_handler, + .extra1 = (void *)&dirty_bytes_min, + }, + { + .procname = "dirty_writeback_centisecs", + .data = &dirty_writeback_interval, + .maxlen = sizeof(dirty_writeback_interval), + .mode = 0644, + .proc_handler = dirty_writeback_centisecs_handler, + }, + { + .procname = "dirty_expire_centisecs", + .data = &dirty_expire_interval, + .maxlen = sizeof(dirty_expire_interval), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ZERO, + }, +#ifdef CONFIG_HIGHMEM + { + .procname = "highmem_is_dirtyable", + .data = &vm_highmem_is_dirtyable, + .maxlen = sizeof(vm_highmem_is_dirtyable), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, + }, +#endif + { + .procname = "laptop_mode", + .data = &laptop_mode, + .maxlen = sizeof(laptop_mode), + .mode = 0644, + .proc_handler = proc_dointvec_jiffies, + }, + {} +}; +#endif + /* * Called early on to tune the page writeback dirty limits. * @@ -2093,6 +2173,9 @@ void __init page_writeback_init(void) page_writeback_cpu_online, NULL); cpuhp_setup_state(CPUHP_MM_WRITEBACK_DEAD, "mm/writeback:dead", NULL, page_writeback_cpu_online); +#ifdef CONFIG_SYSCTL + register_sysctl_init("vm", vm_page_writeback_sysctls); +#endif } /** From f79c9b8ae8bde10126586c1bb55b5fd027276d8e Mon Sep 17 00:00:00 2001 From: tangmeng Date: Fri, 18 Feb 2022 18:58:57 +0800 Subject: [PATCH 13/28] kernel/lockdep: move lockdep sysctls to its own file kernel/sysctl.c is a kitchen sink where everyone leaves their dirty dishes, this makes it very difficult to maintain. To help with this maintenance let's start by moving sysctls to places where they actually belong. The proc sysctl maintainers do not want to know what sysctl knobs you wish to add for your own piece of code, we just care about the core logic. All filesystem syctls now get reviewed by fs folks. This commit follows the commit of fs, move the prove_locking and lock_stat sysctls to its own file, kernel/lockdep.c. Signed-off-by: tangmeng Signed-off-by: Luis Chamberlain --- include/linux/lockdep.h | 4 ---- kernel/locking/lockdep.c | 35 +++++++++++++++++++++++++++++++++-- kernel/sysctl.c | 21 --------------------- 3 files changed, 33 insertions(+), 27 deletions(-) diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index 467b94257105..37951c17908e 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -16,10 +16,6 @@ struct task_struct; -/* for sysctl */ -extern int prove_locking; -extern int lock_stat; - #ifdef CONFIG_LOCKDEP #include diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index c06cab6546ed..a4382ae1be59 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -64,19 +64,50 @@ #include #ifdef CONFIG_PROVE_LOCKING -int prove_locking = 1; +static int prove_locking = 1; module_param(prove_locking, int, 0644); #else #define prove_locking 0 #endif #ifdef CONFIG_LOCK_STAT -int lock_stat = 1; +static int lock_stat = 1; module_param(lock_stat, int, 0644); #else #define lock_stat 0 #endif +#ifdef CONFIG_SYSCTL +static struct ctl_table kern_lockdep_table[] = { +#ifdef CONFIG_PROVE_LOCKING + { + .procname = "prove_locking", + .data = &prove_locking, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, +#endif /* CONFIG_PROVE_LOCKING */ +#ifdef CONFIG_LOCK_STAT + { + .procname = "lock_stat", + .data = &lock_stat, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, +#endif /* CONFIG_LOCK_STAT */ + { } +}; + +static __init int kernel_lockdep_sysctls_init(void) +{ + register_sysctl_init("kernel", kern_lockdep_table); + return 0; +} +late_initcall(kernel_lockdep_sysctls_init); +#endif /* CONFIG_SYSCTL */ + DEFINE_PER_CPU(unsigned int, lockdep_recursion); EXPORT_PER_CPU_SYMBOL_GPL(lockdep_recursion); diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 36bfe1c92d44..95380d250c8c 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -88,9 +88,6 @@ #ifdef CONFIG_RT_MUTEXES #include #endif -#if defined(CONFIG_PROVE_LOCKING) || defined(CONFIG_LOCK_STAT) -#include -#endif #if defined(CONFIG_SYSCTL) @@ -1679,24 +1676,6 @@ static struct ctl_table kern_table[] = { .extra2 = SYSCTL_FOUR, }, #endif /* CONFIG_NUMA_BALANCING */ -#ifdef CONFIG_PROVE_LOCKING - { - .procname = "prove_locking", - .data = &prove_locking, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, -#endif -#ifdef CONFIG_LOCK_STAT - { - .procname = "lock_stat", - .data = &lock_stat, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, -#endif { .procname = "panic", .data = &panic_timeout, From 9df918698408fd914493aba0b7858fef50eba63a Mon Sep 17 00:00:00 2001 From: tangmeng Date: Fri, 18 Feb 2022 18:59:12 +0800 Subject: [PATCH 14/28] kernel/panic: move panic sysctls to its own file kernel/sysctl.c is a kitchen sink where everyone leaves their dirty dishes, this makes it very difficult to maintain. To help with this maintenance let's start by moving sysctls to places where they actually belong. The proc sysctl maintainers do not want to know what sysctl knobs you wish to add for your own piece of code, we just care about the core logic. All filesystem syctls now get reviewed by fs folks. This commit follows the commit of fs, move the oops_all_cpu_backtrace sysctl to its own file, kernel/panic.c. Signed-off-by: tangmeng Signed-off-by: Luis Chamberlain --- include/linux/panic.h | 6 ------ kernel/panic.c | 26 +++++++++++++++++++++++++- kernel/sysctl.c | 11 ----------- 3 files changed, 25 insertions(+), 18 deletions(-) diff --git a/include/linux/panic.h b/include/linux/panic.h index f5844908a089..e71161da69c4 100644 --- a/include/linux/panic.h +++ b/include/linux/panic.h @@ -15,12 +15,6 @@ extern void oops_enter(void); extern void oops_exit(void); extern bool oops_may_print(void); -#ifdef CONFIG_SMP -extern unsigned int sysctl_oops_all_cpu_backtrace; -#else -#define sysctl_oops_all_cpu_backtrace 0 -#endif /* CONFIG_SMP */ - extern int panic_timeout; extern unsigned long panic_print; extern int panic_on_oops; diff --git a/kernel/panic.c b/kernel/panic.c index eb4dfb932c85..eb3f2fe4f6d7 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -43,7 +43,9 @@ * Should we dump all CPUs backtraces in an oops event? * Defaults to 0, can be changed via sysctl. */ -unsigned int __read_mostly sysctl_oops_all_cpu_backtrace; +static unsigned int __read_mostly sysctl_oops_all_cpu_backtrace; +#else +#define sysctl_oops_all_cpu_backtrace 0 #endif /* CONFIG_SMP */ int panic_on_oops = CONFIG_PANIC_ON_OOPS_VALUE; @@ -73,6 +75,28 @@ ATOMIC_NOTIFIER_HEAD(panic_notifier_list); EXPORT_SYMBOL(panic_notifier_list); +#if defined(CONFIG_SMP) && defined(CONFIG_SYSCTL) +static struct ctl_table kern_panic_table[] = { + { + .procname = "oops_all_cpu_backtrace", + .data = &sysctl_oops_all_cpu_backtrace, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, + }, + { } +}; + +static __init int kernel_panic_sysctls_init(void) +{ + register_sysctl_init("kernel", kern_panic_table); + return 0; +} +late_initcall(kernel_panic_sysctls_init); +#endif + static long no_blink(int state) { return 0; diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 95380d250c8c..90fc2212b536 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1922,17 +1922,6 @@ static struct ctl_table kern_table[] = { .proc_handler = proc_dointvec, }, #endif -#ifdef CONFIG_SMP - { - .procname = "oops_all_cpu_backtrace", - .data = &sysctl_oops_all_cpu_backtrace, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = SYSCTL_ZERO, - .extra2 = SYSCTL_ONE, - }, -#endif /* CONFIG_SMP */ { .procname = "pid_max", .data = &pid_max, From 801b501439d1b366d524dee4fc1e6b3473a95b9a Mon Sep 17 00:00:00 2001 From: tangmeng Date: Fri, 18 Feb 2022 18:59:23 +0800 Subject: [PATCH 15/28] kernel/acct: move acct sysctls to its own file kernel/sysctl.c is a kitchen sink where everyone leaves their dirty dishes, this makes it very difficult to maintain. To help with this maintenance let's start by moving sysctls to places where they actually belong. The proc sysctl maintainers do not want to know what sysctl knobs you wish to add for your own piece of code, we just care about the core logic. All filesystem syctls now get reviewed by fs folks. This commit follows the commit of fs, move the acct sysctl to its own file, kernel/acct.c. Signed-off-by: tangmeng Signed-off-by: Luis Chamberlain --- include/linux/acct.h | 1 - kernel/acct.c | 22 +++++++++++++++++++++- kernel/sysctl.c | 12 ------------ 3 files changed, 21 insertions(+), 14 deletions(-) diff --git a/include/linux/acct.h b/include/linux/acct.h index bc70e81895c0..2718c4854815 100644 --- a/include/linux/acct.h +++ b/include/linux/acct.h @@ -21,7 +21,6 @@ #ifdef CONFIG_BSD_PROCESS_ACCT struct pid_namespace; -extern int acct_parm[]; /* for sysctl */ extern void acct_collect(long exitcode, int group_dead); extern void acct_process(void); extern void acct_exit_ns(struct pid_namespace *); diff --git a/kernel/acct.c b/kernel/acct.c index 3df53cf1dcd5..13706356ec54 100644 --- a/kernel/acct.c +++ b/kernel/acct.c @@ -70,11 +70,31 @@ * Turned into sysctl-controllable parameters. AV, 12/11/98 */ -int acct_parm[3] = {4, 2, 30}; +static int acct_parm[3] = {4, 2, 30}; #define RESUME (acct_parm[0]) /* >foo% free space - resume */ #define SUSPEND (acct_parm[1]) /* #endif -#ifdef CONFIG_BSD_PROCESS_ACCT -#include -#endif #ifdef CONFIG_RT_MUTEXES #include #endif @@ -1856,15 +1853,6 @@ static struct ctl_table kern_table[] = { .proc_handler = proc_dostring, }, #endif -#ifdef CONFIG_BSD_PROCESS_ACCT - { - .procname = "acct", - .data = &acct_parm, - .maxlen = 3*sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, -#endif #ifdef CONFIG_MAGIC_SYSRQ { .procname = "sysrq", From 1186618a6a35d43a865448472a261184b608d13c Mon Sep 17 00:00:00 2001 From: tangmeng Date: Fri, 18 Feb 2022 18:59:36 +0800 Subject: [PATCH 16/28] kernel/delayacct: move delayacct sysctls to its own file kernel/sysctl.c is a kitchen sink where everyone leaves their dirty dishes, this makes it very difficult to maintain. To help with this maintenance let's start by moving sysctls to places where they actually belong. The proc sysctl maintainers do not want to know what sysctl knobs you wish to add for your own piece of code, we just care about the core logic. All filesystem syctls now get reviewed by fs folks. This commit follows the commit of fs, move the delayacct sysctl to its own file, kernel/delayacct.c. Signed-off-by: tangmeng Signed-off-by: Luis Chamberlain --- include/linux/delayacct.h | 3 --- kernel/delayacct.c | 22 +++++++++++++++++++++- kernel/sysctl.c | 12 ------------ 3 files changed, 21 insertions(+), 16 deletions(-) diff --git a/include/linux/delayacct.h b/include/linux/delayacct.h index 3e03d010bd2e..6b16a6930a19 100644 --- a/include/linux/delayacct.h +++ b/include/linux/delayacct.h @@ -61,9 +61,6 @@ extern int delayacct_on; /* Delay accounting turned on/off */ extern struct kmem_cache *delayacct_cache; extern void delayacct_init(void); -extern int sysctl_delayacct(struct ctl_table *table, int write, void *buffer, - size_t *lenp, loff_t *ppos); - extern void __delayacct_tsk_init(struct task_struct *); extern void __delayacct_tsk_exit(struct task_struct *); extern void __delayacct_blkio_start(void); diff --git a/kernel/delayacct.c b/kernel/delayacct.c index c5e8cea9e05f..2c1e18f7c5cf 100644 --- a/kernel/delayacct.c +++ b/kernel/delayacct.c @@ -44,7 +44,7 @@ void delayacct_init(void) } #ifdef CONFIG_PROC_SYSCTL -int sysctl_delayacct(struct ctl_table *table, int write, void *buffer, +static int sysctl_delayacct(struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { int state = delayacct_on; @@ -63,6 +63,26 @@ int sysctl_delayacct(struct ctl_table *table, int write, void *buffer, set_delayacct(state); return err; } + +static struct ctl_table kern_delayacct_table[] = { + { + .procname = "task_delayacct", + .data = NULL, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = sysctl_delayacct, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, + }, + { } +}; + +static __init int kernel_delayacct_sysctls_init(void) +{ + register_sysctl_init("kernel", kern_delayacct_table); + return 0; +} +late_initcall(kernel_delayacct_sysctls_init); #endif void __delayacct_tsk_init(struct task_struct *tsk) diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 5421e28dbb25..9b74ba12a711 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -67,7 +67,6 @@ #include #include #include -#include #include "../lib/kstrtox.h" @@ -1651,17 +1650,6 @@ int proc_do_static_key(struct ctl_table *table, int write, } static struct ctl_table kern_table[] = { -#ifdef CONFIG_TASK_DELAY_ACCT - { - .procname = "task_delayacct", - .data = NULL, - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = sysctl_delayacct, - .extra1 = SYSCTL_ZERO, - .extra2 = SYSCTL_ONE, - }, -#endif /* CONFIG_TASK_DELAY_ACCT */ #ifdef CONFIG_NUMA_BALANCING { .procname = "numa_balancing", From d772cc2c321900f3f463a124eebeb7218e66dda6 Mon Sep 17 00:00:00 2001 From: tangmeng Date: Fri, 18 Feb 2022 18:59:49 +0800 Subject: [PATCH 17/28] kernel/do_mount_initrd: move real_root_dev sysctls to its own file kernel/sysctl.c is a kitchen sink where everyone leaves their dirty dishes, this makes it very difficult to maintain. To help with this maintenance let's start by moving sysctls to places where they actually belong. The proc sysctl maintainers do not want to know what sysctl knobs you wish to add for your own piece of code, we just care about the core logic. All filesystem syctls now get reviewed by fs folks. This commit follows the commit of fs, move the real_root_dev sysctl to its own file, kernel/do_mount_initrd.c. Signed-off-by: tangmeng Signed-off-by: Luis Chamberlain --- include/linux/initrd.h | 2 -- init/do_mounts_initrd.c | 22 +++++++++++++++++++++- kernel/sysctl.c | 9 --------- 3 files changed, 21 insertions(+), 12 deletions(-) diff --git a/include/linux/initrd.h b/include/linux/initrd.h index 1bbe9af48dc3..f1a1f4c92ded 100644 --- a/include/linux/initrd.h +++ b/include/linux/initrd.h @@ -29,8 +29,6 @@ static inline void wait_for_initramfs(void) {} extern phys_addr_t phys_initrd_start; extern unsigned long phys_initrd_size; -extern unsigned int real_root_dev; - extern char __initramfs_start[]; extern unsigned long __initramfs_size; diff --git a/init/do_mounts_initrd.c b/init/do_mounts_initrd.c index 533d81ed74d4..327962ea354c 100644 --- a/init/do_mounts_initrd.c +++ b/init/do_mounts_initrd.c @@ -14,12 +14,32 @@ unsigned long initrd_start, initrd_end; int initrd_below_start_ok; -unsigned int real_root_dev; /* do_proc_dointvec cannot handle kdev_t */ +static unsigned int real_root_dev; /* do_proc_dointvec cannot handle kdev_t */ static int __initdata mount_initrd = 1; phys_addr_t phys_initrd_start __initdata; unsigned long phys_initrd_size __initdata; +#ifdef CONFIG_SYSCTL +static struct ctl_table kern_do_mounts_initrd_table[] = { + { + .procname = "real-root-dev", + .data = &real_root_dev, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { } +}; + +static __init int kernel_do_mounts_initrd_sysctls_init(void) +{ + register_sysctl_init("kernel", kern_do_mounts_initrd_table); + return 0; +} +late_initcall(kernel_do_mounts_initrd_sysctls_init); +#endif /* CONFIG_SYSCTL */ + static int __init no_initrd(char *str) { mount_initrd = 0; diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 9b74ba12a711..10a551f8fcab 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1693,15 +1693,6 @@ static struct ctl_table kern_table[] = { .mode = 0644, .proc_handler = sysctl_latencytop, }, -#endif -#ifdef CONFIG_BLK_DEV_INITRD - { - .procname = "real-root-dev", - .data = &real_root_dev, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, #endif { .procname = "print-fatal-signals", From 8e4e83b2278bdfb55cb2b13de07cf0a721ce8af7 Mon Sep 17 00:00:00 2001 From: Wei Xiao Date: Wed, 23 Feb 2022 19:11:53 +0800 Subject: [PATCH 18/28] ftrace: move sysctl_ftrace_enabled to ftrace.c This moves ftrace_enabled to trace/ftrace.c. We move sysctls to places where features actually belong to improve the readability of the code and reduce the risk of code merge conflicts. At the same time, the proc-sysctl maintainers do not want to know what sysctl knobs you wish to add for your owner piece of code, we just care about the core logic. Signed-off-by: Wei Xiao Acked-by: Steven Rostedt (Google) Signed-off-by: Luis Chamberlain --- include/linux/ftrace.h | 3 --- kernel/sysctl.c | 9 --------- kernel/trace/ftrace.c | 22 +++++++++++++++++++++- 3 files changed, 21 insertions(+), 13 deletions(-) diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 4816b7e11047..088b915853dd 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -101,9 +101,6 @@ static inline int ftrace_mod_get_kallsym(unsigned int symnum, unsigned long *val #ifdef CONFIG_FUNCTION_TRACER extern int ftrace_enabled; -extern int -ftrace_enable_sysctl(struct ctl_table *table, int write, - void *buffer, size_t *lenp, loff_t *ppos); #ifndef CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 10a551f8fcab..21172d3dad6e 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1751,15 +1751,6 @@ static struct ctl_table kern_table[] = { .proc_handler = proc_dointvec, }, #endif -#ifdef CONFIG_FUNCTION_TRACER - { - .procname = "ftrace_enabled", - .data = &ftrace_enabled, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = ftrace_enable_sysctl, - }, -#endif #ifdef CONFIG_STACK_TRACER { .procname = "stack_tracer_enabled", diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 4f1d2f5e7263..a5efbbc289b4 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -7921,7 +7921,8 @@ static bool is_permanent_ops_registered(void) return false; } -int +#ifdef CONFIG_SYSCTL +static int ftrace_enable_sysctl(struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { @@ -7964,3 +7965,22 @@ ftrace_enable_sysctl(struct ctl_table *table, int write, mutex_unlock(&ftrace_lock); return ret; } + +static struct ctl_table ftrace_sysctls[] = { + { + .procname = "ftrace_enabled", + .data = &ftrace_enabled, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = ftrace_enable_sysctl, + }, + {} +}; + +static int __init ftrace_sysctl_init(void) +{ + register_sysctl_init("kernel", ftrace_sysctls); + return 0; +} +late_initcall(ftrace_sysctl_init); +#endif From 5d79fa0d33258d8e79064316ce8fae37e27bd34b Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Thu, 7 Apr 2022 15:46:12 +0800 Subject: [PATCH 19/28] ftrace: Fix build warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If CONFIG_SYSCTL and CONFIG_DYNAMIC_FTRACE is n, build warns: kernel/trace/ftrace.c:7912:13: error: ‘is_permanent_ops_registered’ defined but not used [-Werror=unused-function] static bool is_permanent_ops_registered(void) ^~~~~~~~~~~~~~~~~~~~~~~~~~~ kernel/trace/ftrace.c:89:12: error: ‘last_ftrace_enabled’ defined but not used [-Werror=unused-variable] static int last_ftrace_enabled; ^~~~~~~~~~~~~~~~~~~ Move is_permanent_ops_registered() to ifdef block and mark last_ftrace_enabled as __maybe_unused to fix this. Fixes: 7cde53da38a3 ("ftrace: move sysctl_ftrace_enabled to ftrace.c") Signed-off-by: YueHaibing Acked-by: Steven Rostedt (Google) Signed-off-by: Luis Chamberlain --- kernel/trace/ftrace.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index a5efbbc289b4..db8d553728b6 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -86,7 +86,7 @@ struct ftrace_ops ftrace_list_end __read_mostly = { /* ftrace_enabled is a method to turn ftrace on or off */ int ftrace_enabled __read_mostly; -static int last_ftrace_enabled; +static int __maybe_unused last_ftrace_enabled; /* Current function tracing op */ struct ftrace_ops *function_trace_op __read_mostly = &ftrace_list_end; @@ -7909,6 +7909,7 @@ int unregister_ftrace_function(struct ftrace_ops *ops) } EXPORT_SYMBOL_GPL(unregister_ftrace_function); +#ifdef CONFIG_SYSCTL static bool is_permanent_ops_registered(void) { struct ftrace_ops *op; @@ -7921,7 +7922,6 @@ static bool is_permanent_ops_registered(void) return false; } -#ifdef CONFIG_SYSCTL static int ftrace_enable_sysctl(struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) From f8b7d2b4c192118c37ab24c0540d1134dd0104d8 Mon Sep 17 00:00:00 2001 From: Luis Chamberlain Date: Fri, 15 Apr 2022 14:29:57 -0700 Subject: [PATCH 20/28] ftrace: fix building with SYSCTL=n but DYNAMIC_FTRACE=y MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit One can enable dyanmic tracing but disable sysctls. When this is doen we get the compile kernel warning: CC kernel/trace/ftrace.o kernel/trace/ftrace.c:3086:13: warning: ‘ftrace_shutdown_sysctl’ defined but not used [-Wunused-function] 3086 | static void ftrace_shutdown_sysctl(void) | ^~~~~~~~~~~~~~~~~~~~~~ kernel/trace/ftrace.c:3068:13: warning: ‘ftrace_startup_sysctl’ defined but not used [-Wunused-function] 3068 | static void ftrace_startup_sysctl(void) When CONFIG_DYNAMIC_FTRACE=n the ftrace_startup_sysctl() and routines ftrace_shutdown_sysctl() still compiles, so these are actually more just used for when SYSCTL=y. Fix this then by just moving these routines to when sysctls are enabled. Fixes: 7cde53da38a3 ("ftrace: move sysctl_ftrace_enabled to ftrace.c") Reported-by: kernel test robot Acked-by: Steven Rostedt (Google) Signed-off-by: Luis Chamberlain --- kernel/trace/ftrace.c | 71 +++++++++++++++++++++---------------------- 1 file changed, 34 insertions(+), 37 deletions(-) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index db8d553728b6..d9424fd9a183 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -3065,40 +3065,6 @@ int ftrace_shutdown(struct ftrace_ops *ops, int command) return 0; } -static void ftrace_startup_sysctl(void) -{ - int command; - - if (unlikely(ftrace_disabled)) - return; - - /* Force update next time */ - saved_ftrace_func = NULL; - /* ftrace_start_up is true if we want ftrace running */ - if (ftrace_start_up) { - command = FTRACE_UPDATE_CALLS; - if (ftrace_graph_active) - command |= FTRACE_START_FUNC_RET; - ftrace_startup_enable(command); - } -} - -static void ftrace_shutdown_sysctl(void) -{ - int command; - - if (unlikely(ftrace_disabled)) - return; - - /* ftrace_start_up is true if ftrace is running */ - if (ftrace_start_up) { - command = FTRACE_DISABLE_CALLS; - if (ftrace_graph_active) - command |= FTRACE_STOP_FUNC_RET; - ftrace_run_update_code(command); - } -} - static u64 ftrace_update_time; unsigned long ftrace_update_tot_cnt; unsigned long ftrace_number_of_pages; @@ -7267,9 +7233,6 @@ core_initcall(ftrace_nodyn_init); static inline int ftrace_init_dyn_tracefs(struct dentry *d_tracer) { return 0; } static inline void ftrace_startup_all(int command) { } -# define ftrace_startup_sysctl() do { } while (0) -# define ftrace_shutdown_sysctl() do { } while (0) - static void ftrace_update_trampoline(struct ftrace_ops *ops) { } @@ -7910,6 +7873,40 @@ int unregister_ftrace_function(struct ftrace_ops *ops) EXPORT_SYMBOL_GPL(unregister_ftrace_function); #ifdef CONFIG_SYSCTL +static void ftrace_startup_sysctl(void) +{ + int command; + + if (unlikely(ftrace_disabled)) + return; + + /* Force update next time */ + saved_ftrace_func = NULL; + /* ftrace_start_up is true if we want ftrace running */ + if (ftrace_start_up) { + command = FTRACE_UPDATE_CALLS; + if (ftrace_graph_active) + command |= FTRACE_START_FUNC_RET; + ftrace_startup_enable(command); + } +} + +static void ftrace_shutdown_sysctl(void) +{ + int command; + + if (unlikely(ftrace_disabled)) + return; + + /* ftrace_start_up is true if ftrace is running */ + if (ftrace_start_up) { + command = FTRACE_DISABLE_CALLS; + if (ftrace_graph_active) + command |= FTRACE_STOP_FUNC_RET; + ftrace_run_update_code(command); + } +} + static bool is_permanent_ops_registered(void) { struct ftrace_ops *op; From 988f11e046401f8561c4afefa506a50f0203de40 Mon Sep 17 00:00:00 2001 From: liaohua Date: Thu, 7 Apr 2022 15:29:48 +0800 Subject: [PATCH 21/28] latencytop: move sysctl to its own file This moves latencytop sysctl to kernel/latencytop.c Signed-off-by: liaohua Signed-off-by: Luis Chamberlain --- include/linux/latencytop.h | 3 --- kernel/latencytop.c | 41 +++++++++++++++++++++++++++----------- kernel/sysctl.c | 10 ---------- 3 files changed, 29 insertions(+), 25 deletions(-) diff --git a/include/linux/latencytop.h b/include/linux/latencytop.h index abe3d95f795b..84f1053cf2a8 100644 --- a/include/linux/latencytop.h +++ b/include/linux/latencytop.h @@ -38,9 +38,6 @@ account_scheduler_latency(struct task_struct *task, int usecs, int inter) void clear_tsk_latency_tracing(struct task_struct *p); -int sysctl_latencytop(struct ctl_table *table, int write, void *buffer, - size_t *lenp, loff_t *ppos); - #else static inline void diff --git a/kernel/latencytop.c b/kernel/latencytop.c index 166d7bf49666..76166df011a4 100644 --- a/kernel/latencytop.c +++ b/kernel/latencytop.c @@ -55,6 +55,7 @@ #include #include #include +#include static DEFINE_RAW_SPINLOCK(latency_lock); @@ -63,6 +64,31 @@ static struct latency_record latency_record[MAXLR]; int latencytop_enabled; +#ifdef CONFIG_SYSCTL +static int sysctl_latencytop(struct ctl_table *table, int write, void *buffer, + size_t *lenp, loff_t *ppos) +{ + int err; + + err = proc_dointvec(table, write, buffer, lenp, ppos); + if (latencytop_enabled) + force_schedstat_enabled(); + + return err; +} + +static struct ctl_table latencytop_sysctl[] = { + { + .procname = "latencytop", + .data = &latencytop_enabled, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = sysctl_latencytop, + }, + {} +}; +#endif + void clear_tsk_latency_tracing(struct task_struct *p) { unsigned long flags; @@ -266,18 +292,9 @@ static const struct proc_ops lstats_proc_ops = { static int __init init_lstats_procfs(void) { proc_create("latency_stats", 0644, NULL, &lstats_proc_ops); +#ifdef CONFIG_SYSCTL + register_sysctl_init("kernel", latencytop_sysctl); +#endif return 0; } - -int sysctl_latencytop(struct ctl_table *table, int write, void *buffer, - size_t *lenp, loff_t *ppos) -{ - int err; - - err = proc_dointvec(table, write, buffer, lenp, ppos); - if (latencytop_enabled) - force_schedstat_enabled(); - - return err; -} device_initcall(init_lstats_procfs); diff --git a/kernel/sysctl.c b/kernel/sysctl.c index c0fdf465a93d..b60345cbadf0 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -64,7 +64,6 @@ #include #include #include -#include #include #include "../lib/kstrtox.h" @@ -1623,15 +1622,6 @@ static struct ctl_table kern_table[] = { .extra1 = SYSCTL_NEG_ONE, .extra2 = SYSCTL_ONE, }, -#endif -#ifdef CONFIG_LATENCYTOP - { - .procname = "latencytop", - .data = &latencytop_enabled, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = sysctl_latencytop, - }, #endif { .procname = "print-fatal-signals", From 3c6a4cba3138d1aeeb8fd917178c6578b9b8ae29 Mon Sep 17 00:00:00 2001 From: Luis Chamberlain Date: Fri, 15 Apr 2022 15:08:02 -0700 Subject: [PATCH 22/28] mm: fix unused variable kernel warning when SYSCTL=n When CONFIG_SYSCTL=n the variable dirty_bytes_min which is just used as a minimum to a proc handler is not used. So just move this under the ifdef for CONFIG_SYSCTL. Fixes: aa779e510219 ("mm: move page-writeback sysctls to their own file") Reported-by: kernel test robot Acked-by: Andrew Morton Signed-off-by: Luis Chamberlain --- mm/page-writeback.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 438762173a59..4f51d0ac5043 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -89,9 +89,6 @@ static int vm_highmem_is_dirtyable; */ static int vm_dirty_ratio = 20; -/* this is needed for the proc_doulongvec_minmax of vm_dirty_bytes */ -static const unsigned long dirty_bytes_min = 2 * PAGE_SIZE; - /* * vm_dirty_bytes starts at 0 (disabled) so that it is a function of * vm_dirty_ratio * the amount of dirtyable memory @@ -2077,6 +2074,10 @@ static int page_writeback_cpu_online(unsigned int cpu) } #ifdef CONFIG_SYSCTL + +/* this is needed for the proc_doulongvec_minmax of vm_dirty_bytes */ +static const unsigned long dirty_bytes_min = 2 * PAGE_SIZE; + static struct ctl_table vm_page_writeback_sysctls[] = { { .procname = "dirty_background_ratio", From cb55f27ac9326bfe3bcaaf0adb498778d64602b6 Mon Sep 17 00:00:00 2001 From: Meng Tang Date: Mon, 11 Apr 2022 13:12:05 +0800 Subject: [PATCH 23/28] fs/proc: Introduce list_for_each_table_entry for proc sysctl Use the list_for_each_table_entry macro to optimize the scenario of traverse ctl_table. This make the code neater and easier to understand. Suggested-by: Davidlohr Bueso Signed-off-by: Meng Tang [updated the sysctl_check_table() hunk due to some changes upstream] Signed-off-by: Luis Chamberlain --- fs/proc/proc_sysctl.c | 86 ++++++++++++++++++++++++------------------- 1 file changed, 49 insertions(+), 37 deletions(-) diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index 7d9cfc730bd4..8901fe0b7e2a 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -19,6 +19,9 @@ #include #include "internal.h" +#define list_for_each_table_entry(entry, table) \ + for ((entry) = (table); (entry)->procname; (entry)++) + static const struct dentry_operations proc_sys_dentry_operations; static const struct file_operations proc_sys_file_operations; static const struct inode_operations proc_sys_inode_operations; @@ -215,15 +218,19 @@ static void init_header(struct ctl_table_header *head, INIT_HLIST_HEAD(&head->inodes); if (node) { struct ctl_table *entry; - for (entry = table; entry->procname; entry++, node++) + + list_for_each_table_entry(entry, table) { node->header = head; + node++; + } } } static void erase_header(struct ctl_table_header *head) { struct ctl_table *entry; - for (entry = head->ctl_table; entry->procname; entry++) + + list_for_each_table_entry(entry, head->ctl_table) erase_entry(head, entry); } @@ -248,7 +255,7 @@ static int insert_header(struct ctl_dir *dir, struct ctl_table_header *header) err = insert_links(header); if (err) goto fail_links; - for (entry = header->ctl_table; entry->procname; entry++) { + list_for_each_table_entry(entry, header->ctl_table) { err = insert_entry(header, entry); if (err) goto fail; @@ -1130,35 +1137,36 @@ static int sysctl_check_table_array(const char *path, struct ctl_table *table) static int sysctl_check_table(const char *path, struct ctl_table *table) { + struct ctl_table *entry; int err = 0; - for (; table->procname; table++) { - if (table->child) - err |= sysctl_err(path, table, "Not a file"); + list_for_each_table_entry(entry, table) { + if (entry->child) + err |= sysctl_err(path, entry, "Not a file"); - if ((table->proc_handler == proc_dostring) || - (table->proc_handler == proc_dointvec) || - (table->proc_handler == proc_douintvec) || - (table->proc_handler == proc_douintvec_minmax) || - (table->proc_handler == proc_dointvec_minmax) || - (table->proc_handler == proc_dou8vec_minmax) || - (table->proc_handler == proc_dointvec_jiffies) || - (table->proc_handler == proc_dointvec_userhz_jiffies) || - (table->proc_handler == proc_dointvec_ms_jiffies) || - (table->proc_handler == proc_doulongvec_minmax) || - (table->proc_handler == proc_doulongvec_ms_jiffies_minmax)) { - if (!table->data) - err |= sysctl_err(path, table, "No data"); - if (!table->maxlen) - err |= sysctl_err(path, table, "No maxlen"); + if ((entry->proc_handler == proc_dostring) || + (entry->proc_handler == proc_dointvec) || + (entry->proc_handler == proc_douintvec) || + (entry->proc_handler == proc_douintvec_minmax) || + (entry->proc_handler == proc_dointvec_minmax) || + (entry->proc_handler == proc_dou8vec_minmax) || + (entry->proc_handler == proc_dointvec_jiffies) || + (entry->proc_handler == proc_dointvec_userhz_jiffies) || + (entry->proc_handler == proc_dointvec_ms_jiffies) || + (entry->proc_handler == proc_doulongvec_minmax) || + (entry->proc_handler == proc_doulongvec_ms_jiffies_minmax)) { + if (!entry->data) + err |= sysctl_err(path, entry, "No data"); + if (!entry->maxlen) + err |= sysctl_err(path, entry, "No maxlen"); else - err |= sysctl_check_table_array(path, table); + err |= sysctl_check_table_array(path, entry); } - if (!table->proc_handler) - err |= sysctl_err(path, table, "No proc_handler"); + if (!entry->proc_handler) + err |= sysctl_err(path, entry, "No proc_handler"); - if ((table->mode & (S_IRUGO|S_IWUGO)) != table->mode) - err |= sysctl_err(path, table, "bogus .mode 0%o", - table->mode); + if ((entry->mode & (S_IRUGO|S_IWUGO)) != entry->mode) + err |= sysctl_err(path, entry, "bogus .mode 0%o", + entry->mode); } return err; } @@ -1174,7 +1182,7 @@ static struct ctl_table_header *new_links(struct ctl_dir *dir, struct ctl_table name_bytes = 0; nr_entries = 0; - for (entry = table; entry->procname; entry++) { + list_for_each_table_entry(entry, table) { nr_entries++; name_bytes += strlen(entry->procname) + 1; } @@ -1191,14 +1199,16 @@ static struct ctl_table_header *new_links(struct ctl_dir *dir, struct ctl_table node = (struct ctl_node *)(links + 1); link_table = (struct ctl_table *)(node + nr_entries); link_name = (char *)&link_table[nr_entries + 1]; + link = link_table; - for (link = link_table, entry = table; entry->procname; link++, entry++) { + list_for_each_table_entry(entry, table) { int len = strlen(entry->procname) + 1; memcpy(link_name, entry->procname, len); link->procname = link_name; link->mode = S_IFLNK|S_IRWXUGO; link->data = link_root; link_name += len; + link++; } init_header(links, dir->header.root, dir->header.set, node, link_table); links->nreg = nr_entries; @@ -1213,7 +1223,7 @@ static bool get_links(struct ctl_dir *dir, struct ctl_table *entry, *link; /* Are there links available for every entry in table? */ - for (entry = table; entry->procname; entry++) { + list_for_each_table_entry(entry, table) { const char *procname = entry->procname; link = find_entry(&head, dir, procname, strlen(procname)); if (!link) @@ -1226,7 +1236,7 @@ static bool get_links(struct ctl_dir *dir, } /* The checks passed. Increase the registration count on the links */ - for (entry = table; entry->procname; entry++) { + list_for_each_table_entry(entry, table) { const char *procname = entry->procname; link = find_entry(&head, dir, procname, strlen(procname)); head->nreg++; @@ -1329,7 +1339,7 @@ struct ctl_table_header *__register_sysctl_table( struct ctl_node *node; int nr_entries = 0; - for (entry = table; entry->procname; entry++) + list_for_each_table_entry(entry, table) nr_entries++; header = kzalloc(sizeof(struct ctl_table_header) + @@ -1456,7 +1466,7 @@ static int count_subheaders(struct ctl_table *table) if (!table || !table->procname) return 1; - for (entry = table; entry->procname; entry++) { + list_for_each_table_entry(entry, table) { if (entry->child) nr_subheaders += count_subheaders(entry->child); else @@ -1475,7 +1485,7 @@ static int register_leaf_sysctl_tables(const char *path, char *pos, int nr_dirs = 0; int err = -ENOMEM; - for (entry = table; entry->procname; entry++) { + list_for_each_table_entry(entry, table) { if (entry->child) nr_dirs++; else @@ -1492,7 +1502,9 @@ static int register_leaf_sysctl_tables(const char *path, char *pos, goto out; ctl_table_arg = files; - for (new = files, entry = table; entry->procname; entry++) { + new = files; + + list_for_each_table_entry(entry, table) { if (entry->child) continue; *new = *entry; @@ -1516,7 +1528,7 @@ static int register_leaf_sysctl_tables(const char *path, char *pos, } /* Recurse into the subdirectories. */ - for (entry = table; entry->procname; entry++) { + list_for_each_table_entry(entry, table) { char *child_pos; if (!entry->child) @@ -1670,7 +1682,7 @@ static void put_links(struct ctl_table_header *header) if (IS_ERR(core_parent)) return; - for (entry = header->ctl_table; entry->procname; entry++) { + list_for_each_table_entry(entry, header->ctl_table) { struct ctl_table_header *link_head; struct ctl_table *link; const char *name = entry->procname; From 8fd7c2144d1292f15c901211750dee021ed5079a Mon Sep 17 00:00:00 2001 From: Luis Chamberlain Date: Thu, 21 Apr 2022 11:38:43 -0700 Subject: [PATCH 24/28] ftrace: fix building with SYSCTL=y but DYNAMIC_FTRACE=n Ok so hopefully this is the last of it. 0day picked up a build failure [0] when SYSCTL=y but DYNAMIC_FTRACE=n. This can be fixed by just declaring an empty routine for the calls moved just recently. [0] https://lkml.kernel.org/r/202204161203.6dSlgKJX-lkp@intel.com Reported-by: kernel test robot Fixes: f8b7d2b4c192 ("ftrace: fix building with SYSCTL=n but DYNAMIC_FTRACE=y") Acked-by: Steven Rostedt (Google) Signed-off-by: Luis Chamberlain --- kernel/trace/ftrace.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index d9424fd9a183..1f89039f0feb 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -7873,6 +7873,8 @@ int unregister_ftrace_function(struct ftrace_ops *ops) EXPORT_SYMBOL_GPL(unregister_ftrace_function); #ifdef CONFIG_SYSCTL + +#ifdef CONFIG_DYNAMIC_FTRACE static void ftrace_startup_sysctl(void) { int command; @@ -7906,6 +7908,10 @@ static void ftrace_shutdown_sysctl(void) ftrace_run_update_code(command); } } +#else +# define ftrace_startup_sysctl() do { } while (0) +# define ftrace_shutdown_sysctl() do { } while (0) +#endif /* CONFIG_DYNAMIC_FTRACE */ static bool is_permanent_ops_registered(void) { From acd0b04ecc795e97b7878dccc5cb4d3d627a4c27 Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Sun, 24 Apr 2022 21:28:54 +0300 Subject: [PATCH 25/28] sysctl: minor cleanup in new_dir() Byte zeroing is not required here, since memory was allocated by kzalloc() Signed-off-by: Vasily Averin Signed-off-by: Luis Chamberlain --- fs/proc/proc_sysctl.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index 8901fe0b7e2a..f5b12adee69d 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -985,7 +985,6 @@ static struct ctl_dir *new_dir(struct ctl_table_set *set, table = (struct ctl_table *)(node + 1); new_name = (char *)(table + 2); memcpy(new_name, name, namelen); - new_name[namelen] = '\0'; table[0].procname = new_name; table[0].mode = S_IFDIR|S_IRUGO|S_IXUGO; init_header(&new->header, set->dir.header.root, set, node, table); From a467257ffe4bdb13eacddec0137013f6a1140b81 Mon Sep 17 00:00:00 2001 From: yingelin Date: Sun, 24 Apr 2022 10:57:40 +0800 Subject: [PATCH 26/28] kernel/kexec_core: move kexec_core sysctls into its own file This move the kernel/kexec_core.c respective sysctls to its own file. kernel/sysctl.c has grown to an insane mess, We move sysctls to places where features actually belong to improve the readability and reduce merge conflicts. At the same time, the proc-sysctl maintainers can easily care about the core logic other than the sysctl knobs added for some feature. We already moved all filesystem sysctls out. This patch is part of the effort to move kexec related sysctls out. Signed-off-by: yingelin Acked-by: Baoquan He Signed-off-by: Luis Chamberlain --- kernel/kexec_core.c | 22 ++++++++++++++++++++++ kernel/sysctl.c | 13 ------------- 2 files changed, 22 insertions(+), 13 deletions(-) diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c index 68480f731192..a0456baf52cc 100644 --- a/kernel/kexec_core.c +++ b/kernel/kexec_core.c @@ -936,6 +936,28 @@ int kimage_load_segment(struct kimage *image, struct kimage *kexec_image; struct kimage *kexec_crash_image; int kexec_load_disabled; +#ifdef CONFIG_SYSCTL +static struct ctl_table kexec_core_sysctls[] = { + { + .procname = "kexec_load_disabled", + .data = &kexec_load_disabled, + .maxlen = sizeof(int), + .mode = 0644, + /* only handle a transition from default "0" to "1" */ + .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ONE, + .extra2 = SYSCTL_ONE, + }, + { } +}; + +static int __init kexec_core_sysctl_init(void) +{ + register_sysctl_init("kernel", kexec_core_sysctls); + return 0; +} +late_initcall(kexec_core_sysctl_init); +#endif /* * No panic_cpu check version of crash_kexec(). This function is called diff --git a/kernel/sysctl.c b/kernel/sysctl.c index b60345cbadf0..0f3cb61a2e39 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -61,7 +61,6 @@ #include #include #include -#include #include #include #include @@ -1712,18 +1711,6 @@ static struct ctl_table kern_table[] = { .proc_handler = tracepoint_printk_sysctl, }, #endif -#ifdef CONFIG_KEXEC_CORE - { - .procname = "kexec_load_disabled", - .data = &kexec_load_disabled, - .maxlen = sizeof(int), - .mode = 0644, - /* only handle a transition from default "0" to "1" */ - .proc_handler = proc_dointvec_minmax, - .extra1 = SYSCTL_ONE, - .extra2 = SYSCTL_ONE, - }, -#endif #ifdef CONFIG_MODULES { .procname = "modprobe", From 764aaf44cd64dd1f760268ee0b22d2dc53cd5bc0 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Wed, 27 Apr 2022 20:54:01 +0800 Subject: [PATCH 27/28] reboot: Fix build warning without CONFIG_SYSCTL MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If CONFIG_SYSCTL is n, build warn: kernel/reboot.c:443:20: error: ‘kernel_reboot_sysctls_init’ defined but not used [-Werror=unused-function] static void __init kernel_reboot_sysctls_init(void) ^~~~~~~~~~~~~~~~~~~~~~~~~~ Move kernel_reboot_sysctls_init() to #ifdef block to fix this. Fixes: 06d177662fb8 ("kernel/reboot: move reboot sysctls to its own file") Signed-off-by: YueHaibing Signed-off-by: Luis Chamberlain --- kernel/reboot.c | 54 ++++++++++++++++++++++++------------------------- 1 file changed, 27 insertions(+), 27 deletions(-) diff --git a/kernel/reboot.c b/kernel/reboot.c index ed4e6dfb7d44..ecbf09ea03c5 100644 --- a/kernel/reboot.c +++ b/kernel/reboot.c @@ -421,33 +421,6 @@ void ctrl_alt_del(void) static char poweroff_cmd[POWEROFF_CMD_PATH_LEN] = "/sbin/poweroff"; static const char reboot_cmd[] = "/sbin/reboot"; -#ifdef CONFIG_SYSCTL -static struct ctl_table kern_reboot_table[] = { - { - .procname = "poweroff_cmd", - .data = &poweroff_cmd, - .maxlen = POWEROFF_CMD_PATH_LEN, - .mode = 0644, - .proc_handler = proc_dostring, - }, - { - .procname = "ctrl-alt-del", - .data = &C_A_D, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, - { } -}; - -static void __init kernel_reboot_sysctls_init(void) -{ - register_sysctl_init("kernel", kern_reboot_table); -} -#else -#define kernel_reboot_sysctls_init() do { } while (0) -#endif /* CONFIG_SYSCTL */ - static int run_cmd(const char *cmd) { char **argv; @@ -895,6 +868,33 @@ static struct attribute *reboot_attrs[] = { NULL, }; +#ifdef CONFIG_SYSCTL +static struct ctl_table kern_reboot_table[] = { + { + .procname = "poweroff_cmd", + .data = &poweroff_cmd, + .maxlen = POWEROFF_CMD_PATH_LEN, + .mode = 0644, + .proc_handler = proc_dostring, + }, + { + .procname = "ctrl-alt-del", + .data = &C_A_D, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { } +}; + +static void __init kernel_reboot_sysctls_init(void) +{ + register_sysctl_init("kernel", kern_reboot_table); +} +#else +#define kernel_reboot_sysctls_init() do { } while (0) +#endif /* CONFIG_SYSCTL */ + static const struct attribute_group reboot_attr_group = { .attrs = reboot_attrs, }; From 494dcdf46e5cdee926c9f441d37e3ea1db57d1da Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Wed, 27 Apr 2022 21:10:02 +0800 Subject: [PATCH 28/28] sched: Fix build warning without CONFIG_SYSCTL MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit IF CONFIG_SYSCTL is n, build warn: kernel/sched/core.c:1782:12: warning: ‘sysctl_sched_uclamp_handler’ defined but not used [-Wunused-function] static int sysctl_sched_uclamp_handler(struct ctl_table *table, int write, ^~~~~~~~~~~~~~~~~~~~~~~~~~~ sysctl_sched_uclamp_handler() is used while CONFIG_SYSCTL enabled, wrap all related code with CONFIG_SYSCTL to fix this. Fixes: 3267e0156c33 ("sched: Move uclamp_util sysctls to core.c") Signed-off-by: YueHaibing Signed-off-by: Luis Chamberlain --- kernel/sched/core.c | 65 +++++++++++++++++++++++++-------------------- 1 file changed, 36 insertions(+), 29 deletions(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index ef31751c5799..b8d2ff09b853 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -1306,10 +1306,10 @@ static void set_load_weight(struct task_struct *p, bool update_load) static DEFINE_MUTEX(uclamp_mutex); /* Max allowed minimum utilization */ -static unsigned int sysctl_sched_uclamp_util_min = SCHED_CAPACITY_SCALE; +static unsigned int __maybe_unused sysctl_sched_uclamp_util_min = SCHED_CAPACITY_SCALE; /* Max allowed maximum utilization */ -static unsigned int sysctl_sched_uclamp_util_max = SCHED_CAPACITY_SCALE; +static unsigned int __maybe_unused sysctl_sched_uclamp_util_max = SCHED_CAPACITY_SCALE; /* * By default RT tasks run at the maximum performance point/capacity of the @@ -1456,33 +1456,6 @@ static void uclamp_update_util_min_rt_default(struct task_struct *p) task_rq_unlock(rq, p, &rf); } -static void uclamp_sync_util_min_rt_default(void) -{ - struct task_struct *g, *p; - - /* - * copy_process() sysctl_uclamp - * uclamp_min_rt = X; - * write_lock(&tasklist_lock) read_lock(&tasklist_lock) - * // link thread smp_mb__after_spinlock() - * write_unlock(&tasklist_lock) read_unlock(&tasklist_lock); - * sched_post_fork() for_each_process_thread() - * __uclamp_sync_rt() __uclamp_sync_rt() - * - * Ensures that either sched_post_fork() will observe the new - * uclamp_min_rt or for_each_process_thread() will observe the new - * task. - */ - read_lock(&tasklist_lock); - smp_mb__after_spinlock(); - read_unlock(&tasklist_lock); - - rcu_read_lock(); - for_each_process_thread(g, p) - uclamp_update_util_min_rt_default(p); - rcu_read_unlock(); -} - static inline struct uclamp_se uclamp_tg_restrict(struct task_struct *p, enum uclamp_id clamp_id) { @@ -1762,6 +1735,11 @@ uclamp_update_active_tasks(struct cgroup_subsys_state *css) } static void cpu_util_update_eff(struct cgroup_subsys_state *css); +#endif + +#ifdef CONFIG_SYSCTL +#ifdef CONFIG_UCLAMP_TASK +#ifdef CONFIG_UCLAMP_TASK_GROUP static void uclamp_update_root_tg(void) { struct task_group *tg = &root_task_group; @@ -1779,6 +1757,33 @@ static void uclamp_update_root_tg(void) static void uclamp_update_root_tg(void) { } #endif +static void uclamp_sync_util_min_rt_default(void) +{ + struct task_struct *g, *p; + + /* + * copy_process() sysctl_uclamp + * uclamp_min_rt = X; + * write_lock(&tasklist_lock) read_lock(&tasklist_lock) + * // link thread smp_mb__after_spinlock() + * write_unlock(&tasklist_lock) read_unlock(&tasklist_lock); + * sched_post_fork() for_each_process_thread() + * __uclamp_sync_rt() __uclamp_sync_rt() + * + * Ensures that either sched_post_fork() will observe the new + * uclamp_min_rt or for_each_process_thread() will observe the new + * task. + */ + read_lock(&tasklist_lock); + smp_mb__after_spinlock(); + read_unlock(&tasklist_lock); + + rcu_read_lock(); + for_each_process_thread(g, p) + uclamp_update_util_min_rt_default(p); + rcu_read_unlock(); +} + static int sysctl_sched_uclamp_handler(struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { @@ -1843,6 +1848,8 @@ done: return result; } +#endif +#endif static int uclamp_validate(struct task_struct *p, const struct sched_attr *attr)