sysctl: Improve the sysctl sanity checks
- Stop validating subdirectories now that we only register leaf tables - Cleanup and improve the duplicate filename check. * Run the duplicate filename check under the sysctl_lock to guarantee we never add duplicate names. * Reduce the duplicate filename check to nearly O(M*N) where M is the number of entries in tthe table we are registering and N is the number of entries in the directory before we got there. - Move the duplicate filename check into it's own function and call it directtly from __register_sysctl_table - Kill the config option as the sanity checks are now cheap enough the config option is unnecessary. The original reason for the config option was because we had a huge table used to verify the proc filename to binary sysctl mapping. That table has now evolved into the binary_sysctl translation layer and is no longer part of the sysctl_check code. - Tighten up the permission checks. Guarnateeing that files only have read or write permissions. - Removed redudant check for parents having a procname as now everything has a procname. - Generalize the backtrace logic so that we print a backtrace from any failure of __register_sysctl_table that was not caused by a memmory allocation failure. The backtrace allows us to track down who erroneously registered a sysctl table. Bechmark before (CONFIG_SYSCTL_CHECK=y): make-dummies 0 999 -> 12s rmmod dummy -> 0.08s Bechmark before (CONFIG_SYSCTL_CHECK=n): make-dummies 0 999 -> 0.7s rmmod dummy -> 0.06s make-dummies 0 99999 -> 1m13s rmmod dummy -> 0.38s Benchmark after: make-dummies 0 999 -> 0.65s rmmod dummy -> 0.055s make-dummies 0 9999 -> 1m10s rmmod dummy -> 0.39s The sysctl sanity checks now impose no measurable cost. Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
This commit is contained in:
parent
f728019bb7
commit
7c60c48f58
|
@ -726,160 +726,106 @@ static void try_attach(struct ctl_table_header *p, struct ctl_table_header *q)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef CONFIG_SYSCTL_SYSCALL_CHECK
|
static int sysctl_check_table_dups(const char *path, struct ctl_table *old,
|
||||||
static int sysctl_depth(struct ctl_table *table)
|
struct ctl_table *table)
|
||||||
{
|
|
||||||
struct ctl_table *tmp;
|
|
||||||
int depth;
|
|
||||||
|
|
||||||
depth = 0;
|
|
||||||
for (tmp = table; tmp->parent; tmp = tmp->parent)
|
|
||||||
depth++;
|
|
||||||
|
|
||||||
return depth;
|
|
||||||
}
|
|
||||||
|
|
||||||
static struct ctl_table *sysctl_parent(struct ctl_table *table, int n)
|
|
||||||
{
|
|
||||||
int i;
|
|
||||||
|
|
||||||
for (i = 0; table && i < n; i++)
|
|
||||||
table = table->parent;
|
|
||||||
|
|
||||||
return table;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
static void sysctl_print_path(struct ctl_table *table)
|
|
||||||
{
|
|
||||||
struct ctl_table *tmp;
|
|
||||||
int depth, i;
|
|
||||||
depth = sysctl_depth(table);
|
|
||||||
if (table->procname) {
|
|
||||||
for (i = depth; i >= 0; i--) {
|
|
||||||
tmp = sysctl_parent(table, i);
|
|
||||||
printk("/%s", tmp->procname?tmp->procname:"");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
printk(" ");
|
|
||||||
}
|
|
||||||
|
|
||||||
static struct ctl_table *sysctl_check_lookup(struct nsproxy *namespaces,
|
|
||||||
struct ctl_table *table)
|
|
||||||
{
|
|
||||||
struct ctl_table_header *head;
|
|
||||||
struct ctl_table *ref, *test;
|
|
||||||
int depth, cur_depth;
|
|
||||||
|
|
||||||
depth = sysctl_depth(table);
|
|
||||||
|
|
||||||
for (head = __sysctl_head_next(namespaces, NULL); head;
|
|
||||||
head = __sysctl_head_next(namespaces, head)) {
|
|
||||||
cur_depth = depth;
|
|
||||||
ref = head->ctl_table;
|
|
||||||
repeat:
|
|
||||||
test = sysctl_parent(table, cur_depth);
|
|
||||||
for (; ref->procname; ref++) {
|
|
||||||
int match = 0;
|
|
||||||
if (cur_depth && !ref->child)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
if (test->procname && ref->procname &&
|
|
||||||
(strcmp(test->procname, ref->procname) == 0))
|
|
||||||
match++;
|
|
||||||
|
|
||||||
if (match) {
|
|
||||||
if (cur_depth != 0) {
|
|
||||||
cur_depth--;
|
|
||||||
ref = ref->child;
|
|
||||||
goto repeat;
|
|
||||||
}
|
|
||||||
goto out;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
ref = NULL;
|
|
||||||
out:
|
|
||||||
sysctl_head_finish(head);
|
|
||||||
return ref;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void set_fail(const char **fail, struct ctl_table *table, const char *str)
|
|
||||||
{
|
|
||||||
if (*fail) {
|
|
||||||
printk(KERN_ERR "sysctl table check failed: ");
|
|
||||||
sysctl_print_path(table);
|
|
||||||
printk(" %s\n", *fail);
|
|
||||||
dump_stack();
|
|
||||||
}
|
|
||||||
*fail = str;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void sysctl_check_leaf(struct nsproxy *namespaces,
|
|
||||||
struct ctl_table *table, const char **fail)
|
|
||||||
{
|
|
||||||
struct ctl_table *ref;
|
|
||||||
|
|
||||||
ref = sysctl_check_lookup(namespaces, table);
|
|
||||||
if (ref && (ref != table))
|
|
||||||
set_fail(fail, table, "Sysctl already exists");
|
|
||||||
}
|
|
||||||
|
|
||||||
static int sysctl_check_table(struct nsproxy *namespaces, struct ctl_table *table)
|
|
||||||
{
|
{
|
||||||
|
struct ctl_table *entry, *test;
|
||||||
int error = 0;
|
int error = 0;
|
||||||
for (; table->procname; table++) {
|
|
||||||
const char *fail = NULL;
|
|
||||||
|
|
||||||
if (table->parent) {
|
for (entry = old; entry->procname; entry++) {
|
||||||
if (!table->parent->procname)
|
for (test = table; test->procname; test++) {
|
||||||
set_fail(&fail, table, "Parent without procname");
|
if (strcmp(entry->procname, test->procname) == 0) {
|
||||||
}
|
printk(KERN_ERR "sysctl duplicate entry: %s/%s\n",
|
||||||
if (table->child) {
|
path, test->procname);
|
||||||
if (table->data)
|
error = -EEXIST;
|
||||||
set_fail(&fail, table, "Directory with data?");
|
|
||||||
if (table->maxlen)
|
|
||||||
set_fail(&fail, table, "Directory with maxlen?");
|
|
||||||
if ((table->mode & (S_IRUGO|S_IXUGO)) != table->mode)
|
|
||||||
set_fail(&fail, table, "Writable sysctl directory");
|
|
||||||
if (table->proc_handler)
|
|
||||||
set_fail(&fail, table, "Directory with proc_handler");
|
|
||||||
if (table->extra1)
|
|
||||||
set_fail(&fail, table, "Directory with extra1");
|
|
||||||
if (table->extra2)
|
|
||||||
set_fail(&fail, table, "Directory with extra2");
|
|
||||||
} else {
|
|
||||||
if ((table->proc_handler == proc_dostring) ||
|
|
||||||
(table->proc_handler == proc_dointvec) ||
|
|
||||||
(table->proc_handler == proc_dointvec_minmax) ||
|
|
||||||
(table->proc_handler == proc_dointvec_jiffies) ||
|
|
||||||
(table->proc_handler == proc_dointvec_userhz_jiffies) ||
|
|
||||||
(table->proc_handler == proc_dointvec_ms_jiffies) ||
|
|
||||||
(table->proc_handler == proc_doulongvec_minmax) ||
|
|
||||||
(table->proc_handler == proc_doulongvec_ms_jiffies_minmax)) {
|
|
||||||
if (!table->data)
|
|
||||||
set_fail(&fail, table, "No data");
|
|
||||||
if (!table->maxlen)
|
|
||||||
set_fail(&fail, table, "No maxlen");
|
|
||||||
}
|
}
|
||||||
#ifdef CONFIG_PROC_SYSCTL
|
|
||||||
if (!table->proc_handler)
|
|
||||||
set_fail(&fail, table, "No proc_handler");
|
|
||||||
#endif
|
|
||||||
sysctl_check_leaf(namespaces, table, &fail);
|
|
||||||
}
|
}
|
||||||
if (table->mode > 0777)
|
|
||||||
set_fail(&fail, table, "bogus .mode");
|
|
||||||
if (fail) {
|
|
||||||
set_fail(&fail, table, NULL);
|
|
||||||
error = -EINVAL;
|
|
||||||
}
|
|
||||||
if (table->child)
|
|
||||||
error |= sysctl_check_table(namespaces, table->child);
|
|
||||||
}
|
}
|
||||||
return error;
|
return error;
|
||||||
}
|
}
|
||||||
#endif /* CONFIG_SYSCTL_SYSCALL_CHECK */
|
|
||||||
|
static int sysctl_check_dups(struct nsproxy *namespaces,
|
||||||
|
struct ctl_table_header *header,
|
||||||
|
const char *path, struct ctl_table *table)
|
||||||
|
{
|
||||||
|
struct ctl_table_root *root;
|
||||||
|
struct ctl_table_set *set;
|
||||||
|
struct ctl_table_header *dir_head, *head;
|
||||||
|
struct ctl_table *dir_table;
|
||||||
|
int error = 0;
|
||||||
|
|
||||||
|
/* No dups if we are the only member of our directory */
|
||||||
|
if (header->attached_by != table)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
dir_head = header->parent;
|
||||||
|
dir_table = header->attached_to;
|
||||||
|
|
||||||
|
error = sysctl_check_table_dups(path, dir_table, table);
|
||||||
|
|
||||||
|
root = &sysctl_table_root;
|
||||||
|
do {
|
||||||
|
set = lookup_header_set(root, namespaces);
|
||||||
|
|
||||||
|
list_for_each_entry(head, &set->list, ctl_entry) {
|
||||||
|
if (head->unregistering)
|
||||||
|
continue;
|
||||||
|
if (head->attached_to != dir_table)
|
||||||
|
continue;
|
||||||
|
error = sysctl_check_table_dups(path, head->attached_by,
|
||||||
|
table);
|
||||||
|
}
|
||||||
|
root = list_entry(root->root_list.next,
|
||||||
|
struct ctl_table_root, root_list);
|
||||||
|
} while (root != &sysctl_table_root);
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int sysctl_err(const char *path, struct ctl_table *table, char *fmt, ...)
|
||||||
|
{
|
||||||
|
struct va_format vaf;
|
||||||
|
va_list args;
|
||||||
|
|
||||||
|
va_start(args, fmt);
|
||||||
|
vaf.fmt = fmt;
|
||||||
|
vaf.va = &args;
|
||||||
|
|
||||||
|
printk(KERN_ERR "sysctl table check failed: %s/%s %pV\n",
|
||||||
|
path, table->procname, &vaf);
|
||||||
|
|
||||||
|
va_end(args);
|
||||||
|
return -EINVAL;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int sysctl_check_table(const char *path, struct ctl_table *table)
|
||||||
|
{
|
||||||
|
int err = 0;
|
||||||
|
for (; table->procname; table++) {
|
||||||
|
if (table->child)
|
||||||
|
err = sysctl_err(path, table, "Not a file");
|
||||||
|
|
||||||
|
if ((table->proc_handler == proc_dostring) ||
|
||||||
|
(table->proc_handler == proc_dointvec) ||
|
||||||
|
(table->proc_handler == proc_dointvec_minmax) ||
|
||||||
|
(table->proc_handler == proc_dointvec_jiffies) ||
|
||||||
|
(table->proc_handler == proc_dointvec_userhz_jiffies) ||
|
||||||
|
(table->proc_handler == proc_dointvec_ms_jiffies) ||
|
||||||
|
(table->proc_handler == proc_doulongvec_minmax) ||
|
||||||
|
(table->proc_handler == proc_doulongvec_ms_jiffies_minmax)) {
|
||||||
|
if (!table->data)
|
||||||
|
err = sysctl_err(path, table, "No data");
|
||||||
|
if (!table->maxlen)
|
||||||
|
err = sysctl_err(path, table, "No maxlen");
|
||||||
|
}
|
||||||
|
if (!table->proc_handler)
|
||||||
|
err = sysctl_err(path, table, "No proc_handler");
|
||||||
|
|
||||||
|
if ((table->mode & (S_IRUGO|S_IWUGO)) != table->mode)
|
||||||
|
err = sysctl_err(path, table, "bogus .mode 0%o",
|
||||||
|
table->mode);
|
||||||
|
}
|
||||||
|
return err;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* __register_sysctl_table - register a leaf sysctl table
|
* __register_sysctl_table - register a leaf sysctl table
|
||||||
|
@ -1003,12 +949,8 @@ struct ctl_table_header *__register_sysctl_table(
|
||||||
header->root = root;
|
header->root = root;
|
||||||
sysctl_set_parent(NULL, header->ctl_table);
|
sysctl_set_parent(NULL, header->ctl_table);
|
||||||
header->count = 1;
|
header->count = 1;
|
||||||
#ifdef CONFIG_SYSCTL_SYSCALL_CHECK
|
if (sysctl_check_table(path, table))
|
||||||
if (sysctl_check_table(namespaces, header->ctl_table)) {
|
goto fail;
|
||||||
kfree(header);
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
spin_lock(&sysctl_lock);
|
spin_lock(&sysctl_lock);
|
||||||
header->set = lookup_header_set(root, namespaces);
|
header->set = lookup_header_set(root, namespaces);
|
||||||
header->attached_by = header->ctl_table;
|
header->attached_by = header->ctl_table;
|
||||||
|
@ -1029,11 +971,19 @@ struct ctl_table_header *__register_sysctl_table(
|
||||||
struct ctl_table_root, root_list);
|
struct ctl_table_root, root_list);
|
||||||
set = lookup_header_set(root, namespaces);
|
set = lookup_header_set(root, namespaces);
|
||||||
}
|
}
|
||||||
|
if (sysctl_check_dups(namespaces, header, path, table))
|
||||||
|
goto fail_locked;
|
||||||
header->parent->count++;
|
header->parent->count++;
|
||||||
list_add_tail(&header->ctl_entry, &header->set->list);
|
list_add_tail(&header->ctl_entry, &header->set->list);
|
||||||
spin_unlock(&sysctl_lock);
|
spin_unlock(&sysctl_lock);
|
||||||
|
|
||||||
return header;
|
return header;
|
||||||
|
fail_locked:
|
||||||
|
spin_unlock(&sysctl_lock);
|
||||||
|
fail:
|
||||||
|
kfree(header);
|
||||||
|
dump_stack();
|
||||||
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
static char *append_path(const char *path, char *pos, const char *name)
|
static char *append_path(const char *path, char *pos, const char *name)
|
||||||
|
|
|
@ -1113,14 +1113,6 @@ config LATENCYTOP
|
||||||
Enable this option if you want to use the LatencyTOP tool
|
Enable this option if you want to use the LatencyTOP tool
|
||||||
to find out which userspace is blocking on what kernel operations.
|
to find out which userspace is blocking on what kernel operations.
|
||||||
|
|
||||||
config SYSCTL_SYSCALL_CHECK
|
|
||||||
bool "Sysctl checks"
|
|
||||||
depends on SYSCTL
|
|
||||||
---help---
|
|
||||||
sys_sysctl uses binary paths that have been found challenging
|
|
||||||
to properly maintain and use. This enables checks that help
|
|
||||||
you to keep things correct.
|
|
||||||
|
|
||||||
source mm/Kconfig.debug
|
source mm/Kconfig.debug
|
||||||
source kernel/trace/Kconfig
|
source kernel/trace/Kconfig
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue