sysctl: Improve the sysctl sanity checks

- Stop validating subdirectories now that we only register leaf tables

- Cleanup and improve the duplicate filename check.
  * Run the duplicate filename check under the sysctl_lock to guarantee
    we never add duplicate names.
  * Reduce the duplicate filename check to nearly O(M*N) where M is the
    number of entries in tthe table we are registering and N is the
    number of entries in the directory before we got there.

- Move the duplicate filename check into it's own function and call
  it directtly from __register_sysctl_table

- Kill the config option as the sanity checks are now cheap enough
  the config option is unnecessary. The original reason for the config
  option was because we had a huge table used to verify the proc filename
  to binary sysctl mapping.  That table has now evolved into the binary_sysctl
  translation layer and is no longer part of the sysctl_check code.

- Tighten up the permission checks.  Guarnateeing that files only have read
  or write permissions.

- Removed redudant check for parents having a procname as now everything has
  a procname.

- Generalize the backtrace logic so that we print a backtrace from
  any failure of __register_sysctl_table that was not caused by
  a memmory allocation failure.  The backtrace allows us to track
  down who erroneously registered a sysctl table.

Bechmark before (CONFIG_SYSCTL_CHECK=y):
    make-dummies 0 999 -> 12s
    rmmod dummy        -> 0.08s

Bechmark before (CONFIG_SYSCTL_CHECK=n):
    make-dummies 0 999 -> 0.7s
    rmmod dummy        -> 0.06s
    make-dummies 0 99999 -> 1m13s
    rmmod dummy          -> 0.38s

Benchmark after:
    make-dummies 0 999 -> 0.65s
    rmmod dummy        -> 0.055s
    make-dummies 0 9999 -> 1m10s
    rmmod dummy         -> 0.39s

The sysctl sanity checks now impose no measurable cost.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
This commit is contained in:
Eric W. Biederman 2012-01-21 13:34:05 -08:00
parent f728019bb7
commit 7c60c48f58
2 changed files with 102 additions and 160 deletions

View File

@ -726,160 +726,106 @@ static void try_attach(struct ctl_table_header *p, struct ctl_table_header *q)
} }
} }
#ifdef CONFIG_SYSCTL_SYSCALL_CHECK static int sysctl_check_table_dups(const char *path, struct ctl_table *old,
static int sysctl_depth(struct ctl_table *table) struct ctl_table *table)
{
struct ctl_table *tmp;
int depth;
depth = 0;
for (tmp = table; tmp->parent; tmp = tmp->parent)
depth++;
return depth;
}
static struct ctl_table *sysctl_parent(struct ctl_table *table, int n)
{
int i;
for (i = 0; table && i < n; i++)
table = table->parent;
return table;
}
static void sysctl_print_path(struct ctl_table *table)
{
struct ctl_table *tmp;
int depth, i;
depth = sysctl_depth(table);
if (table->procname) {
for (i = depth; i >= 0; i--) {
tmp = sysctl_parent(table, i);
printk("/%s", tmp->procname?tmp->procname:"");
}
}
printk(" ");
}
static struct ctl_table *sysctl_check_lookup(struct nsproxy *namespaces,
struct ctl_table *table)
{
struct ctl_table_header *head;
struct ctl_table *ref, *test;
int depth, cur_depth;
depth = sysctl_depth(table);
for (head = __sysctl_head_next(namespaces, NULL); head;
head = __sysctl_head_next(namespaces, head)) {
cur_depth = depth;
ref = head->ctl_table;
repeat:
test = sysctl_parent(table, cur_depth);
for (; ref->procname; ref++) {
int match = 0;
if (cur_depth && !ref->child)
continue;
if (test->procname && ref->procname &&
(strcmp(test->procname, ref->procname) == 0))
match++;
if (match) {
if (cur_depth != 0) {
cur_depth--;
ref = ref->child;
goto repeat;
}
goto out;
}
}
}
ref = NULL;
out:
sysctl_head_finish(head);
return ref;
}
static void set_fail(const char **fail, struct ctl_table *table, const char *str)
{
if (*fail) {
printk(KERN_ERR "sysctl table check failed: ");
sysctl_print_path(table);
printk(" %s\n", *fail);
dump_stack();
}
*fail = str;
}
static void sysctl_check_leaf(struct nsproxy *namespaces,
struct ctl_table *table, const char **fail)
{
struct ctl_table *ref;
ref = sysctl_check_lookup(namespaces, table);
if (ref && (ref != table))
set_fail(fail, table, "Sysctl already exists");
}
static int sysctl_check_table(struct nsproxy *namespaces, struct ctl_table *table)
{ {
struct ctl_table *entry, *test;
int error = 0; int error = 0;
for (; table->procname; table++) {
const char *fail = NULL;
if (table->parent) { for (entry = old; entry->procname; entry++) {
if (!table->parent->procname) for (test = table; test->procname; test++) {
set_fail(&fail, table, "Parent without procname"); if (strcmp(entry->procname, test->procname) == 0) {
} printk(KERN_ERR "sysctl duplicate entry: %s/%s\n",
if (table->child) { path, test->procname);
if (table->data) error = -EEXIST;
set_fail(&fail, table, "Directory with data?");
if (table->maxlen)
set_fail(&fail, table, "Directory with maxlen?");
if ((table->mode & (S_IRUGO|S_IXUGO)) != table->mode)
set_fail(&fail, table, "Writable sysctl directory");
if (table->proc_handler)
set_fail(&fail, table, "Directory with proc_handler");
if (table->extra1)
set_fail(&fail, table, "Directory with extra1");
if (table->extra2)
set_fail(&fail, table, "Directory with extra2");
} else {
if ((table->proc_handler == proc_dostring) ||
(table->proc_handler == proc_dointvec) ||
(table->proc_handler == proc_dointvec_minmax) ||
(table->proc_handler == proc_dointvec_jiffies) ||
(table->proc_handler == proc_dointvec_userhz_jiffies) ||
(table->proc_handler == proc_dointvec_ms_jiffies) ||
(table->proc_handler == proc_doulongvec_minmax) ||
(table->proc_handler == proc_doulongvec_ms_jiffies_minmax)) {
if (!table->data)
set_fail(&fail, table, "No data");
if (!table->maxlen)
set_fail(&fail, table, "No maxlen");
} }
#ifdef CONFIG_PROC_SYSCTL
if (!table->proc_handler)
set_fail(&fail, table, "No proc_handler");
#endif
sysctl_check_leaf(namespaces, table, &fail);
} }
if (table->mode > 0777)
set_fail(&fail, table, "bogus .mode");
if (fail) {
set_fail(&fail, table, NULL);
error = -EINVAL;
}
if (table->child)
error |= sysctl_check_table(namespaces, table->child);
} }
return error; return error;
} }
#endif /* CONFIG_SYSCTL_SYSCALL_CHECK */
static int sysctl_check_dups(struct nsproxy *namespaces,
struct ctl_table_header *header,
const char *path, struct ctl_table *table)
{
struct ctl_table_root *root;
struct ctl_table_set *set;
struct ctl_table_header *dir_head, *head;
struct ctl_table *dir_table;
int error = 0;
/* No dups if we are the only member of our directory */
if (header->attached_by != table)
return 0;
dir_head = header->parent;
dir_table = header->attached_to;
error = sysctl_check_table_dups(path, dir_table, table);
root = &sysctl_table_root;
do {
set = lookup_header_set(root, namespaces);
list_for_each_entry(head, &set->list, ctl_entry) {
if (head->unregistering)
continue;
if (head->attached_to != dir_table)
continue;
error = sysctl_check_table_dups(path, head->attached_by,
table);
}
root = list_entry(root->root_list.next,
struct ctl_table_root, root_list);
} while (root != &sysctl_table_root);
return error;
}
static int sysctl_err(const char *path, struct ctl_table *table, char *fmt, ...)
{
struct va_format vaf;
va_list args;
va_start(args, fmt);
vaf.fmt = fmt;
vaf.va = &args;
printk(KERN_ERR "sysctl table check failed: %s/%s %pV\n",
path, table->procname, &vaf);
va_end(args);
return -EINVAL;
}
static int sysctl_check_table(const char *path, struct ctl_table *table)
{
int err = 0;
for (; table->procname; table++) {
if (table->child)
err = sysctl_err(path, table, "Not a file");
if ((table->proc_handler == proc_dostring) ||
(table->proc_handler == proc_dointvec) ||
(table->proc_handler == proc_dointvec_minmax) ||
(table->proc_handler == proc_dointvec_jiffies) ||
(table->proc_handler == proc_dointvec_userhz_jiffies) ||
(table->proc_handler == proc_dointvec_ms_jiffies) ||
(table->proc_handler == proc_doulongvec_minmax) ||
(table->proc_handler == proc_doulongvec_ms_jiffies_minmax)) {
if (!table->data)
err = sysctl_err(path, table, "No data");
if (!table->maxlen)
err = sysctl_err(path, table, "No maxlen");
}
if (!table->proc_handler)
err = sysctl_err(path, table, "No proc_handler");
if ((table->mode & (S_IRUGO|S_IWUGO)) != table->mode)
err = sysctl_err(path, table, "bogus .mode 0%o",
table->mode);
}
return err;
}
/** /**
* __register_sysctl_table - register a leaf sysctl table * __register_sysctl_table - register a leaf sysctl table
@ -1003,12 +949,8 @@ struct ctl_table_header *__register_sysctl_table(
header->root = root; header->root = root;
sysctl_set_parent(NULL, header->ctl_table); sysctl_set_parent(NULL, header->ctl_table);
header->count = 1; header->count = 1;
#ifdef CONFIG_SYSCTL_SYSCALL_CHECK if (sysctl_check_table(path, table))
if (sysctl_check_table(namespaces, header->ctl_table)) { goto fail;
kfree(header);
return NULL;
}
#endif
spin_lock(&sysctl_lock); spin_lock(&sysctl_lock);
header->set = lookup_header_set(root, namespaces); header->set = lookup_header_set(root, namespaces);
header->attached_by = header->ctl_table; header->attached_by = header->ctl_table;
@ -1029,11 +971,19 @@ struct ctl_table_header *__register_sysctl_table(
struct ctl_table_root, root_list); struct ctl_table_root, root_list);
set = lookup_header_set(root, namespaces); set = lookup_header_set(root, namespaces);
} }
if (sysctl_check_dups(namespaces, header, path, table))
goto fail_locked;
header->parent->count++; header->parent->count++;
list_add_tail(&header->ctl_entry, &header->set->list); list_add_tail(&header->ctl_entry, &header->set->list);
spin_unlock(&sysctl_lock); spin_unlock(&sysctl_lock);
return header; return header;
fail_locked:
spin_unlock(&sysctl_lock);
fail:
kfree(header);
dump_stack();
return NULL;
} }
static char *append_path(const char *path, char *pos, const char *name) static char *append_path(const char *path, char *pos, const char *name)

View File

@ -1113,14 +1113,6 @@ config LATENCYTOP
Enable this option if you want to use the LatencyTOP tool Enable this option if you want to use the LatencyTOP tool
to find out which userspace is blocking on what kernel operations. to find out which userspace is blocking on what kernel operations.
config SYSCTL_SYSCALL_CHECK
bool "Sysctl checks"
depends on SYSCTL
---help---
sys_sysctl uses binary paths that have been found challenging
to properly maintain and use. This enables checks that help
you to keep things correct.
source mm/Kconfig.debug source mm/Kconfig.debug
source kernel/trace/Kconfig source kernel/trace/Kconfig