OpenCloudOS-Kernel/security/keys/keyring.c

1280 lines
32 KiB
C
Raw Normal View History

/* Keyring handling
*
* Copyright (C) 2004-2005, 2008 Red Hat, Inc. All Rights Reserved.
* Written by David Howells (dhowells@redhat.com)
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
#include <linux/module.h>
#include <linux/init.h>
#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/security.h>
#include <linux/seq_file.h>
#include <linux/err.h>
#include <keys/keyring-type.h>
#include <linux/uaccess.h>
#include "internal.h"
#define rcu_dereference_locked_keyring(keyring) \
(rcu_dereference_protected( \
(keyring)->payload.subscriptions, \
rwsem_is_locked((struct rw_semaphore *)&(keyring)->sem)))
#define rcu_deref_link_locked(klist, index, keyring) \
(rcu_dereference_protected( \
(klist)->keys[index], \
rwsem_is_locked((struct rw_semaphore *)&(keyring)->sem)))
KEYS: Do LRU discard in full keyrings Do an LRU discard in keyrings that are full rather than returning ENFILE. To perform this, a time_t is added to the key struct and updated by the creation of a link to a key and by a key being found as the result of a search. At the completion of a successful search, the keyrings in the path between the root of the search and the first found link to it also have their last-used times updated. Note that discarding a link to a key from a keyring does not necessarily destroy the key as there may be references held by other places. An alternate discard method that might suffice is to perform FIFO discard from the keyring, using the spare 2-byte hole in the keylist header as the index of the next link to be discarded. This is useful when using a keyring as a cache for DNS results or foreign filesystem IDs. This can be tested by the following. As root do: echo 1000 >/proc/sys/kernel/keys/root_maxkeys kr=`keyctl newring foo @s` for ((i=0; i<2000; i++)); do keyctl add user a$i a $kr; done Without this patch ENFILE should be reported when the keyring fills up. With this patch, the keyring discards keys in an LRU fashion. Note that the stored LRU time has a granularity of 1s. After doing this, /proc/key-users can be observed and should show that most of the 2000 keys have been discarded: [root@andromeda ~]# cat /proc/key-users 0: 517 516/516 513/1000 5249/20000 The "513/1000" here is the number of quota-accounted keys present for this user out of the maximum permitted. In /proc/keys, the keyring shows the number of keys it has and the number of slots it has allocated: [root@andromeda ~]# grep foo /proc/keys 200c64c4 I--Q-- 1 perm 3b3f0000 0 0 keyring foo: 509/509 The maximum is (PAGE_SIZE - header) / key pointer size. That's typically 509 on a 64-bit system and 1020 on a 32-bit system. Signed-off-by: David Howells <dhowells@redhat.com>
2012-05-11 17:56:56 +08:00
#define MAX_KEYRING_LINKS \
min_t(size_t, USHRT_MAX - 1, \
((PAGE_SIZE - sizeof(struct keyring_list)) / sizeof(struct key *)))
#define KEY_LINK_FIXQUOTA 1UL
/*
* When plumbing the depths of the key tree, this sets a hard limit
* set on how deep we're willing to go.
*/
#define KEYRING_SEARCH_MAX_DEPTH 6
/*
* We keep all named keyrings in a hash to speed looking them up.
*/
#define KEYRING_NAME_HASH_SIZE (1 << 5)
static struct list_head keyring_name_hash[KEYRING_NAME_HASH_SIZE];
static DEFINE_RWLOCK(keyring_name_lock);
static inline unsigned keyring_hash(const char *desc)
{
unsigned bucket = 0;
for (; *desc; desc++)
bucket += (unsigned char)*desc;
return bucket & (KEYRING_NAME_HASH_SIZE - 1);
}
/*
* The keyring key type definition. Keyrings are simply keys of this type and
* can be treated as ordinary keys in addition to having their own special
* operations.
*/
static int keyring_instantiate(struct key *keyring,
const void *data, size_t datalen);
static int keyring_match(const struct key *keyring, const void *criterion);
static void keyring_revoke(struct key *keyring);
static void keyring_destroy(struct key *keyring);
static void keyring_describe(const struct key *keyring, struct seq_file *m);
static long keyring_read(const struct key *keyring,
char __user *buffer, size_t buflen);
struct key_type key_type_keyring = {
.name = "keyring",
.def_datalen = sizeof(struct keyring_list),
.instantiate = keyring_instantiate,
.match = keyring_match,
.revoke = keyring_revoke,
.destroy = keyring_destroy,
.describe = keyring_describe,
.read = keyring_read,
};
EXPORT_SYMBOL(key_type_keyring);
/*
* Semaphore to serialise link/link calls to prevent two link calls in parallel
* introducing a cycle.
*/
static DECLARE_RWSEM(keyring_serialise_link_sem);
/*
* Publish the name of a keyring so that it can be found by name (if it has
* one).
*/
static void keyring_publish_name(struct key *keyring)
{
int bucket;
if (keyring->description) {
bucket = keyring_hash(keyring->description);
write_lock(&keyring_name_lock);
if (!keyring_name_hash[bucket].next)
INIT_LIST_HEAD(&keyring_name_hash[bucket]);
list_add_tail(&keyring->type_data.link,
&keyring_name_hash[bucket]);
write_unlock(&keyring_name_lock);
}
}
/*
* Initialise a keyring.
*
* Returns 0 on success, -EINVAL if given any data.
*/
static int keyring_instantiate(struct key *keyring,
const void *data, size_t datalen)
{
int ret;
ret = -EINVAL;
if (datalen == 0) {
/* make the keyring available by name if it has one */
keyring_publish_name(keyring);
ret = 0;
}
return ret;
}
/*
* Match keyrings on their name
*/
static int keyring_match(const struct key *keyring, const void *description)
{
return keyring->description &&
strcmp(keyring->description, description) == 0;
}
/*
* Clean up a keyring when it is destroyed. Unpublish its name if it had one
* and dispose of its data.
*
* The garbage collector detects the final key_put(), removes the keyring from
* the serial number tree and then does RCU synchronisation before coming here,
* so we shouldn't need to worry about code poking around here with the RCU
* readlock held by this time.
*/
static void keyring_destroy(struct key *keyring)
{
struct keyring_list *klist;
int loop;
if (keyring->description) {
write_lock(&keyring_name_lock);
if (keyring->type_data.link.next != NULL &&
!list_empty(&keyring->type_data.link))
list_del(&keyring->type_data.link);
write_unlock(&keyring_name_lock);
}
klist = rcu_access_pointer(keyring->payload.subscriptions);
if (klist) {
for (loop = klist->nkeys - 1; loop >= 0; loop--)
key_put(rcu_access_pointer(klist->keys[loop]));
kfree(klist);
}
}
/*
* Describe a keyring for /proc.
*/
static void keyring_describe(const struct key *keyring, struct seq_file *m)
{
struct keyring_list *klist;
if (keyring->description)
seq_puts(m, keyring->description);
else
seq_puts(m, "[anon]");
if (key_is_instantiated(keyring)) {
rcu_read_lock();
klist = rcu_dereference(keyring->payload.subscriptions);
if (klist)
seq_printf(m, ": %u/%u", klist->nkeys, klist->maxkeys);
else
seq_puts(m, ": empty");
rcu_read_unlock();
}
}
/*
* Read a list of key IDs from the keyring's contents in binary form
*
* The keyring's semaphore is read-locked by the caller.
*/
static long keyring_read(const struct key *keyring,
char __user *buffer, size_t buflen)
{
struct keyring_list *klist;
struct key *key;
size_t qty, tmp;
int loop, ret;
ret = 0;
klist = rcu_dereference_locked_keyring(keyring);
if (klist) {
/* calculate how much data we could return */
qty = klist->nkeys * sizeof(key_serial_t);
if (buffer && buflen > 0) {
if (buflen > qty)
buflen = qty;
/* copy the IDs of the subscribed keys into the
* buffer */
ret = -EFAULT;
for (loop = 0; loop < klist->nkeys; loop++) {
key = rcu_deref_link_locked(klist, loop,
keyring);
tmp = sizeof(key_serial_t);
if (tmp > buflen)
tmp = buflen;
if (copy_to_user(buffer,
&key->serial,
tmp) != 0)
goto error;
buflen -= tmp;
if (buflen == 0)
break;
buffer += tmp;
}
}
ret = qty;
}
error:
return ret;
}
/*
* Allocate a keyring and link into the destination keyring.
*/
struct key *keyring_alloc(const char *description, uid_t uid, gid_t gid,
CRED: Inaugurate COW credentials Inaugurate copy-on-write credentials management. This uses RCU to manage the credentials pointer in the task_struct with respect to accesses by other tasks. A process may only modify its own credentials, and so does not need locking to access or modify its own credentials. A mutex (cred_replace_mutex) is added to the task_struct to control the effect of PTRACE_ATTACHED on credential calculations, particularly with respect to execve(). With this patch, the contents of an active credentials struct may not be changed directly; rather a new set of credentials must be prepared, modified and committed using something like the following sequence of events: struct cred *new = prepare_creds(); int ret = blah(new); if (ret < 0) { abort_creds(new); return ret; } return commit_creds(new); There are some exceptions to this rule: the keyrings pointed to by the active credentials may be instantiated - keyrings violate the COW rule as managing COW keyrings is tricky, given that it is possible for a task to directly alter the keys in a keyring in use by another task. To help enforce this, various pointers to sets of credentials, such as those in the task_struct, are declared const. The purpose of this is compile-time discouragement of altering credentials through those pointers. Once a set of credentials has been made public through one of these pointers, it may not be modified, except under special circumstances: (1) Its reference count may incremented and decremented. (2) The keyrings to which it points may be modified, but not replaced. The only safe way to modify anything else is to create a replacement and commit using the functions described in Documentation/credentials.txt (which will be added by a later patch). This patch and the preceding patches have been tested with the LTP SELinux testsuite. This patch makes several logical sets of alteration: (1) execve(). This now prepares and commits credentials in various places in the security code rather than altering the current creds directly. (2) Temporary credential overrides. do_coredump() and sys_faccessat() now prepare their own credentials and temporarily override the ones currently on the acting thread, whilst preventing interference from other threads by holding cred_replace_mutex on the thread being dumped. This will be replaced in a future patch by something that hands down the credentials directly to the functions being called, rather than altering the task's objective credentials. (3) LSM interface. A number of functions have been changed, added or removed: (*) security_capset_check(), ->capset_check() (*) security_capset_set(), ->capset_set() Removed in favour of security_capset(). (*) security_capset(), ->capset() New. This is passed a pointer to the new creds, a pointer to the old creds and the proposed capability sets. It should fill in the new creds or return an error. All pointers, barring the pointer to the new creds, are now const. (*) security_bprm_apply_creds(), ->bprm_apply_creds() Changed; now returns a value, which will cause the process to be killed if it's an error. (*) security_task_alloc(), ->task_alloc_security() Removed in favour of security_prepare_creds(). (*) security_cred_free(), ->cred_free() New. Free security data attached to cred->security. (*) security_prepare_creds(), ->cred_prepare() New. Duplicate any security data attached to cred->security. (*) security_commit_creds(), ->cred_commit() New. Apply any security effects for the upcoming installation of new security by commit_creds(). (*) security_task_post_setuid(), ->task_post_setuid() Removed in favour of security_task_fix_setuid(). (*) security_task_fix_setuid(), ->task_fix_setuid() Fix up the proposed new credentials for setuid(). This is used by cap_set_fix_setuid() to implicitly adjust capabilities in line with setuid() changes. Changes are made to the new credentials, rather than the task itself as in security_task_post_setuid(). (*) security_task_reparent_to_init(), ->task_reparent_to_init() Removed. Instead the task being reparented to init is referred directly to init's credentials. NOTE! This results in the loss of some state: SELinux's osid no longer records the sid of the thread that forked it. (*) security_key_alloc(), ->key_alloc() (*) security_key_permission(), ->key_permission() Changed. These now take cred pointers rather than task pointers to refer to the security context. (4) sys_capset(). This has been simplified and uses less locking. The LSM functions it calls have been merged. (5) reparent_to_kthreadd(). This gives the current thread the same credentials as init by simply using commit_thread() to point that way. (6) __sigqueue_alloc() and switch_uid() __sigqueue_alloc() can't stop the target task from changing its creds beneath it, so this function gets a reference to the currently applicable user_struct which it then passes into the sigqueue struct it returns if successful. switch_uid() is now called from commit_creds(), and possibly should be folded into that. commit_creds() should take care of protecting __sigqueue_alloc(). (7) [sg]et[ug]id() and co and [sg]et_current_groups. The set functions now all use prepare_creds(), commit_creds() and abort_creds() to build and check a new set of credentials before applying it. security_task_set[ug]id() is called inside the prepared section. This guarantees that nothing else will affect the creds until we've finished. The calling of set_dumpable() has been moved into commit_creds(). Much of the functionality of set_user() has been moved into commit_creds(). The get functions all simply access the data directly. (8) security_task_prctl() and cap_task_prctl(). security_task_prctl() has been modified to return -ENOSYS if it doesn't want to handle a function, or otherwise return the return value directly rather than through an argument. Additionally, cap_task_prctl() now prepares a new set of credentials, even if it doesn't end up using it. (9) Keyrings. A number of changes have been made to the keyrings code: (a) switch_uid_keyring(), copy_keys(), exit_keys() and suid_keys() have all been dropped and built in to the credentials functions directly. They may want separating out again later. (b) key_alloc() and search_process_keyrings() now take a cred pointer rather than a task pointer to specify the security context. (c) copy_creds() gives a new thread within the same thread group a new thread keyring if its parent had one, otherwise it discards the thread keyring. (d) The authorisation key now points directly to the credentials to extend the search into rather pointing to the task that carries them. (e) Installing thread, process or session keyrings causes a new set of credentials to be created, even though it's not strictly necessary for process or session keyrings (they're shared). (10) Usermode helper. The usermode helper code now carries a cred struct pointer in its subprocess_info struct instead of a new session keyring pointer. This set of credentials is derived from init_cred and installed on the new process after it has been cloned. call_usermodehelper_setup() allocates the new credentials and call_usermodehelper_freeinfo() discards them if they haven't been used. A special cred function (prepare_usermodeinfo_creds()) is provided specifically for call_usermodehelper_setup() to call. call_usermodehelper_setkeys() adjusts the credentials to sport the supplied keyring as the new session keyring. (11) SELinux. SELinux has a number of changes, in addition to those to support the LSM interface changes mentioned above: (a) selinux_setprocattr() no longer does its check for whether the current ptracer can access processes with the new SID inside the lock that covers getting the ptracer's SID. Whilst this lock ensures that the check is done with the ptracer pinned, the result is only valid until the lock is released, so there's no point doing it inside the lock. (12) is_single_threaded(). This function has been extracted from selinux_setprocattr() and put into a file of its own in the lib/ directory as join_session_keyring() now wants to use it too. The code in SELinux just checked to see whether a task shared mm_structs with other tasks (CLONE_VM), but that isn't good enough. We really want to know if they're part of the same thread group (CLONE_THREAD). (13) nfsd. The NFS server daemon now has to use the COW credentials to set the credentials it is going to use. It really needs to pass the credentials down to the functions it calls, but it can't do that until other patches in this series have been applied. Signed-off-by: David Howells <dhowells@redhat.com> Acked-by: James Morris <jmorris@namei.org> Signed-off-by: James Morris <jmorris@namei.org>
2008-11-14 07:39:23 +08:00
const struct cred *cred, unsigned long flags,
struct key *dest)
{
struct key *keyring;
int ret;
keyring = key_alloc(&key_type_keyring, description,
CRED: Inaugurate COW credentials Inaugurate copy-on-write credentials management. This uses RCU to manage the credentials pointer in the task_struct with respect to accesses by other tasks. A process may only modify its own credentials, and so does not need locking to access or modify its own credentials. A mutex (cred_replace_mutex) is added to the task_struct to control the effect of PTRACE_ATTACHED on credential calculations, particularly with respect to execve(). With this patch, the contents of an active credentials struct may not be changed directly; rather a new set of credentials must be prepared, modified and committed using something like the following sequence of events: struct cred *new = prepare_creds(); int ret = blah(new); if (ret < 0) { abort_creds(new); return ret; } return commit_creds(new); There are some exceptions to this rule: the keyrings pointed to by the active credentials may be instantiated - keyrings violate the COW rule as managing COW keyrings is tricky, given that it is possible for a task to directly alter the keys in a keyring in use by another task. To help enforce this, various pointers to sets of credentials, such as those in the task_struct, are declared const. The purpose of this is compile-time discouragement of altering credentials through those pointers. Once a set of credentials has been made public through one of these pointers, it may not be modified, except under special circumstances: (1) Its reference count may incremented and decremented. (2) The keyrings to which it points may be modified, but not replaced. The only safe way to modify anything else is to create a replacement and commit using the functions described in Documentation/credentials.txt (which will be added by a later patch). This patch and the preceding patches have been tested with the LTP SELinux testsuite. This patch makes several logical sets of alteration: (1) execve(). This now prepares and commits credentials in various places in the security code rather than altering the current creds directly. (2) Temporary credential overrides. do_coredump() and sys_faccessat() now prepare their own credentials and temporarily override the ones currently on the acting thread, whilst preventing interference from other threads by holding cred_replace_mutex on the thread being dumped. This will be replaced in a future patch by something that hands down the credentials directly to the functions being called, rather than altering the task's objective credentials. (3) LSM interface. A number of functions have been changed, added or removed: (*) security_capset_check(), ->capset_check() (*) security_capset_set(), ->capset_set() Removed in favour of security_capset(). (*) security_capset(), ->capset() New. This is passed a pointer to the new creds, a pointer to the old creds and the proposed capability sets. It should fill in the new creds or return an error. All pointers, barring the pointer to the new creds, are now const. (*) security_bprm_apply_creds(), ->bprm_apply_creds() Changed; now returns a value, which will cause the process to be killed if it's an error. (*) security_task_alloc(), ->task_alloc_security() Removed in favour of security_prepare_creds(). (*) security_cred_free(), ->cred_free() New. Free security data attached to cred->security. (*) security_prepare_creds(), ->cred_prepare() New. Duplicate any security data attached to cred->security. (*) security_commit_creds(), ->cred_commit() New. Apply any security effects for the upcoming installation of new security by commit_creds(). (*) security_task_post_setuid(), ->task_post_setuid() Removed in favour of security_task_fix_setuid(). (*) security_task_fix_setuid(), ->task_fix_setuid() Fix up the proposed new credentials for setuid(). This is used by cap_set_fix_setuid() to implicitly adjust capabilities in line with setuid() changes. Changes are made to the new credentials, rather than the task itself as in security_task_post_setuid(). (*) security_task_reparent_to_init(), ->task_reparent_to_init() Removed. Instead the task being reparented to init is referred directly to init's credentials. NOTE! This results in the loss of some state: SELinux's osid no longer records the sid of the thread that forked it. (*) security_key_alloc(), ->key_alloc() (*) security_key_permission(), ->key_permission() Changed. These now take cred pointers rather than task pointers to refer to the security context. (4) sys_capset(). This has been simplified and uses less locking. The LSM functions it calls have been merged. (5) reparent_to_kthreadd(). This gives the current thread the same credentials as init by simply using commit_thread() to point that way. (6) __sigqueue_alloc() and switch_uid() __sigqueue_alloc() can't stop the target task from changing its creds beneath it, so this function gets a reference to the currently applicable user_struct which it then passes into the sigqueue struct it returns if successful. switch_uid() is now called from commit_creds(), and possibly should be folded into that. commit_creds() should take care of protecting __sigqueue_alloc(). (7) [sg]et[ug]id() and co and [sg]et_current_groups. The set functions now all use prepare_creds(), commit_creds() and abort_creds() to build and check a new set of credentials before applying it. security_task_set[ug]id() is called inside the prepared section. This guarantees that nothing else will affect the creds until we've finished. The calling of set_dumpable() has been moved into commit_creds(). Much of the functionality of set_user() has been moved into commit_creds(). The get functions all simply access the data directly. (8) security_task_prctl() and cap_task_prctl(). security_task_prctl() has been modified to return -ENOSYS if it doesn't want to handle a function, or otherwise return the return value directly rather than through an argument. Additionally, cap_task_prctl() now prepares a new set of credentials, even if it doesn't end up using it. (9) Keyrings. A number of changes have been made to the keyrings code: (a) switch_uid_keyring(), copy_keys(), exit_keys() and suid_keys() have all been dropped and built in to the credentials functions directly. They may want separating out again later. (b) key_alloc() and search_process_keyrings() now take a cred pointer rather than a task pointer to specify the security context. (c) copy_creds() gives a new thread within the same thread group a new thread keyring if its parent had one, otherwise it discards the thread keyring. (d) The authorisation key now points directly to the credentials to extend the search into rather pointing to the task that carries them. (e) Installing thread, process or session keyrings causes a new set of credentials to be created, even though it's not strictly necessary for process or session keyrings (they're shared). (10) Usermode helper. The usermode helper code now carries a cred struct pointer in its subprocess_info struct instead of a new session keyring pointer. This set of credentials is derived from init_cred and installed on the new process after it has been cloned. call_usermodehelper_setup() allocates the new credentials and call_usermodehelper_freeinfo() discards them if they haven't been used. A special cred function (prepare_usermodeinfo_creds()) is provided specifically for call_usermodehelper_setup() to call. call_usermodehelper_setkeys() adjusts the credentials to sport the supplied keyring as the new session keyring. (11) SELinux. SELinux has a number of changes, in addition to those to support the LSM interface changes mentioned above: (a) selinux_setprocattr() no longer does its check for whether the current ptracer can access processes with the new SID inside the lock that covers getting the ptracer's SID. Whilst this lock ensures that the check is done with the ptracer pinned, the result is only valid until the lock is released, so there's no point doing it inside the lock. (12) is_single_threaded(). This function has been extracted from selinux_setprocattr() and put into a file of its own in the lib/ directory as join_session_keyring() now wants to use it too. The code in SELinux just checked to see whether a task shared mm_structs with other tasks (CLONE_VM), but that isn't good enough. We really want to know if they're part of the same thread group (CLONE_THREAD). (13) nfsd. The NFS server daemon now has to use the COW credentials to set the credentials it is going to use. It really needs to pass the credentials down to the functions it calls, but it can't do that until other patches in this series have been applied. Signed-off-by: David Howells <dhowells@redhat.com> Acked-by: James Morris <jmorris@namei.org> Signed-off-by: James Morris <jmorris@namei.org>
2008-11-14 07:39:23 +08:00
uid, gid, cred,
(KEY_POS_ALL & ~KEY_POS_SETATTR) | KEY_USR_ALL,
flags);
if (!IS_ERR(keyring)) {
[PATCH] Keys: Make request-key create an authorisation key The attached patch makes the following changes: (1) There's a new special key type called ".request_key_auth". This is an authorisation key for when one process requests a key and another process is started to construct it. This type of key cannot be created by the user; nor can it be requested by kernel services. Authorisation keys hold two references: (a) Each refers to a key being constructed. When the key being constructed is instantiated the authorisation key is revoked, rendering it of no further use. (b) The "authorising process". This is either: (i) the process that called request_key(), or: (ii) if the process that called request_key() itself had an authorisation key in its session keyring, then the authorising process referred to by that authorisation key will also be referred to by the new authorisation key. This means that the process that initiated a chain of key requests will authorise the lot of them, and will, by default, wind up with the keys obtained from them in its keyrings. (2) request_key() creates an authorisation key which is then passed to /sbin/request-key in as part of a new session keyring. (3) When request_key() is searching for a key to hand back to the caller, if it comes across an authorisation key in the session keyring of the calling process, it will also search the keyrings of the process specified therein and it will use the specified process's credentials (fsuid, fsgid, groups) to do that rather than the calling process's credentials. This allows a process started by /sbin/request-key to find keys belonging to the authorising process. (4) A key can be read, even if the process executing KEYCTL_READ doesn't have direct read or search permission if that key is contained within the keyrings of a process specified by an authorisation key found within the calling process's session keyring, and is searchable using the credentials of the authorising process. This allows a process started by /sbin/request-key to read keys belonging to the authorising process. (5) The magic KEY_SPEC_*_KEYRING key IDs when passed to KEYCTL_INSTANTIATE or KEYCTL_NEGATE will specify a keyring of the authorising process, rather than the process doing the instantiation. (6) One of the process keyrings can be nominated as the default to which request_key() should attach new keys if not otherwise specified. This is done with KEYCTL_SET_REQKEY_KEYRING and one of the KEY_REQKEY_DEFL_* constants. The current setting can also be read using this call. (7) request_key() is partially interruptible. If it is waiting for another process to finish constructing a key, it can be interrupted. This permits a request-key cycle to be broken without recourse to rebooting. Signed-Off-By: David Howells <dhowells@redhat.com> Signed-Off-By: Benoit Boissinot <benoit.boissinot@ens-lyon.org> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-06-24 13:00:56 +08:00
ret = key_instantiate_and_link(keyring, NULL, 0, dest, NULL);
if (ret < 0) {
key_put(keyring);
keyring = ERR_PTR(ret);
}
}
return keyring;
}
/**
* keyring_search_aux - Search a keyring tree for a key matching some criteria
* @keyring_ref: A pointer to the keyring with possession indicator.
* @cred: The credentials to use for permissions checks.
* @type: The type of key to search for.
* @description: Parameter for @match.
* @match: Function to rule on whether or not a key is the one required.
* @no_state_check: Don't check if a matching key is bad
*
* Search the supplied keyring tree for a key that matches the criteria given.
* The root keyring and any linked keyrings must grant Search permission to the
* caller to be searchable and keys can only be found if they too grant Search
* to the caller. The possession flag on the root keyring pointer controls use
* of the possessor bits in permissions checking of the entire tree. In
* addition, the LSM gets to forbid keyring searches and key matches.
*
* The search is performed as a breadth-then-depth search up to the prescribed
* limit (KEYRING_SEARCH_MAX_DEPTH).
*
* Keys are matched to the type provided and are then filtered by the match
* function, which is given the description to use in any way it sees fit. The
* match function may use any attributes of a key that it wishes to to
* determine the match. Normally the match function from the key type would be
* used.
*
* RCU is used to prevent the keyring key lists from disappearing without the
* need to take lots of locks.
*
* Returns a pointer to the found key and increments the key usage count if
* successful; -EAGAIN if no matching keys were found, or if expired or revoked
* keys were found; -ENOKEY if only negative keys were found; -ENOTDIR if the
* specified keyring wasn't a keyring.
*
* In the case of a successful return, the possession attribute from
* @keyring_ref is propagated to the returned key reference.
*/
key_ref_t keyring_search_aux(key_ref_t keyring_ref,
CRED: Inaugurate COW credentials Inaugurate copy-on-write credentials management. This uses RCU to manage the credentials pointer in the task_struct with respect to accesses by other tasks. A process may only modify its own credentials, and so does not need locking to access or modify its own credentials. A mutex (cred_replace_mutex) is added to the task_struct to control the effect of PTRACE_ATTACHED on credential calculations, particularly with respect to execve(). With this patch, the contents of an active credentials struct may not be changed directly; rather a new set of credentials must be prepared, modified and committed using something like the following sequence of events: struct cred *new = prepare_creds(); int ret = blah(new); if (ret < 0) { abort_creds(new); return ret; } return commit_creds(new); There are some exceptions to this rule: the keyrings pointed to by the active credentials may be instantiated - keyrings violate the COW rule as managing COW keyrings is tricky, given that it is possible for a task to directly alter the keys in a keyring in use by another task. To help enforce this, various pointers to sets of credentials, such as those in the task_struct, are declared const. The purpose of this is compile-time discouragement of altering credentials through those pointers. Once a set of credentials has been made public through one of these pointers, it may not be modified, except under special circumstances: (1) Its reference count may incremented and decremented. (2) The keyrings to which it points may be modified, but not replaced. The only safe way to modify anything else is to create a replacement and commit using the functions described in Documentation/credentials.txt (which will be added by a later patch). This patch and the preceding patches have been tested with the LTP SELinux testsuite. This patch makes several logical sets of alteration: (1) execve(). This now prepares and commits credentials in various places in the security code rather than altering the current creds directly. (2) Temporary credential overrides. do_coredump() and sys_faccessat() now prepare their own credentials and temporarily override the ones currently on the acting thread, whilst preventing interference from other threads by holding cred_replace_mutex on the thread being dumped. This will be replaced in a future patch by something that hands down the credentials directly to the functions being called, rather than altering the task's objective credentials. (3) LSM interface. A number of functions have been changed, added or removed: (*) security_capset_check(), ->capset_check() (*) security_capset_set(), ->capset_set() Removed in favour of security_capset(). (*) security_capset(), ->capset() New. This is passed a pointer to the new creds, a pointer to the old creds and the proposed capability sets. It should fill in the new creds or return an error. All pointers, barring the pointer to the new creds, are now const. (*) security_bprm_apply_creds(), ->bprm_apply_creds() Changed; now returns a value, which will cause the process to be killed if it's an error. (*) security_task_alloc(), ->task_alloc_security() Removed in favour of security_prepare_creds(). (*) security_cred_free(), ->cred_free() New. Free security data attached to cred->security. (*) security_prepare_creds(), ->cred_prepare() New. Duplicate any security data attached to cred->security. (*) security_commit_creds(), ->cred_commit() New. Apply any security effects for the upcoming installation of new security by commit_creds(). (*) security_task_post_setuid(), ->task_post_setuid() Removed in favour of security_task_fix_setuid(). (*) security_task_fix_setuid(), ->task_fix_setuid() Fix up the proposed new credentials for setuid(). This is used by cap_set_fix_setuid() to implicitly adjust capabilities in line with setuid() changes. Changes are made to the new credentials, rather than the task itself as in security_task_post_setuid(). (*) security_task_reparent_to_init(), ->task_reparent_to_init() Removed. Instead the task being reparented to init is referred directly to init's credentials. NOTE! This results in the loss of some state: SELinux's osid no longer records the sid of the thread that forked it. (*) security_key_alloc(), ->key_alloc() (*) security_key_permission(), ->key_permission() Changed. These now take cred pointers rather than task pointers to refer to the security context. (4) sys_capset(). This has been simplified and uses less locking. The LSM functions it calls have been merged. (5) reparent_to_kthreadd(). This gives the current thread the same credentials as init by simply using commit_thread() to point that way. (6) __sigqueue_alloc() and switch_uid() __sigqueue_alloc() can't stop the target task from changing its creds beneath it, so this function gets a reference to the currently applicable user_struct which it then passes into the sigqueue struct it returns if successful. switch_uid() is now called from commit_creds(), and possibly should be folded into that. commit_creds() should take care of protecting __sigqueue_alloc(). (7) [sg]et[ug]id() and co and [sg]et_current_groups. The set functions now all use prepare_creds(), commit_creds() and abort_creds() to build and check a new set of credentials before applying it. security_task_set[ug]id() is called inside the prepared section. This guarantees that nothing else will affect the creds until we've finished. The calling of set_dumpable() has been moved into commit_creds(). Much of the functionality of set_user() has been moved into commit_creds(). The get functions all simply access the data directly. (8) security_task_prctl() and cap_task_prctl(). security_task_prctl() has been modified to return -ENOSYS if it doesn't want to handle a function, or otherwise return the return value directly rather than through an argument. Additionally, cap_task_prctl() now prepares a new set of credentials, even if it doesn't end up using it. (9) Keyrings. A number of changes have been made to the keyrings code: (a) switch_uid_keyring(), copy_keys(), exit_keys() and suid_keys() have all been dropped and built in to the credentials functions directly. They may want separating out again later. (b) key_alloc() and search_process_keyrings() now take a cred pointer rather than a task pointer to specify the security context. (c) copy_creds() gives a new thread within the same thread group a new thread keyring if its parent had one, otherwise it discards the thread keyring. (d) The authorisation key now points directly to the credentials to extend the search into rather pointing to the task that carries them. (e) Installing thread, process or session keyrings causes a new set of credentials to be created, even though it's not strictly necessary for process or session keyrings (they're shared). (10) Usermode helper. The usermode helper code now carries a cred struct pointer in its subprocess_info struct instead of a new session keyring pointer. This set of credentials is derived from init_cred and installed on the new process after it has been cloned. call_usermodehelper_setup() allocates the new credentials and call_usermodehelper_freeinfo() discards them if they haven't been used. A special cred function (prepare_usermodeinfo_creds()) is provided specifically for call_usermodehelper_setup() to call. call_usermodehelper_setkeys() adjusts the credentials to sport the supplied keyring as the new session keyring. (11) SELinux. SELinux has a number of changes, in addition to those to support the LSM interface changes mentioned above: (a) selinux_setprocattr() no longer does its check for whether the current ptracer can access processes with the new SID inside the lock that covers getting the ptracer's SID. Whilst this lock ensures that the check is done with the ptracer pinned, the result is only valid until the lock is released, so there's no point doing it inside the lock. (12) is_single_threaded(). This function has been extracted from selinux_setprocattr() and put into a file of its own in the lib/ directory as join_session_keyring() now wants to use it too. The code in SELinux just checked to see whether a task shared mm_structs with other tasks (CLONE_VM), but that isn't good enough. We really want to know if they're part of the same thread group (CLONE_THREAD). (13) nfsd. The NFS server daemon now has to use the COW credentials to set the credentials it is going to use. It really needs to pass the credentials down to the functions it calls, but it can't do that until other patches in this series have been applied. Signed-off-by: David Howells <dhowells@redhat.com> Acked-by: James Morris <jmorris@namei.org> Signed-off-by: James Morris <jmorris@namei.org>
2008-11-14 07:39:23 +08:00
const struct cred *cred,
struct key_type *type,
const void *description,
key_match_func_t match,
bool no_state_check)
{
struct {
KEYS: Do LRU discard in full keyrings Do an LRU discard in keyrings that are full rather than returning ENFILE. To perform this, a time_t is added to the key struct and updated by the creation of a link to a key and by a key being found as the result of a search. At the completion of a successful search, the keyrings in the path between the root of the search and the first found link to it also have their last-used times updated. Note that discarding a link to a key from a keyring does not necessarily destroy the key as there may be references held by other places. An alternate discard method that might suffice is to perform FIFO discard from the keyring, using the spare 2-byte hole in the keylist header as the index of the next link to be discarded. This is useful when using a keyring as a cache for DNS results or foreign filesystem IDs. This can be tested by the following. As root do: echo 1000 >/proc/sys/kernel/keys/root_maxkeys kr=`keyctl newring foo @s` for ((i=0; i<2000; i++)); do keyctl add user a$i a $kr; done Without this patch ENFILE should be reported when the keyring fills up. With this patch, the keyring discards keys in an LRU fashion. Note that the stored LRU time has a granularity of 1s. After doing this, /proc/key-users can be observed and should show that most of the 2000 keys have been discarded: [root@andromeda ~]# cat /proc/key-users 0: 517 516/516 513/1000 5249/20000 The "513/1000" here is the number of quota-accounted keys present for this user out of the maximum permitted. In /proc/keys, the keyring shows the number of keys it has and the number of slots it has allocated: [root@andromeda ~]# grep foo /proc/keys 200c64c4 I--Q-- 1 perm 3b3f0000 0 0 keyring foo: 509/509 The maximum is (PAGE_SIZE - header) / key pointer size. That's typically 509 on a 64-bit system and 1020 on a 32-bit system. Signed-off-by: David Howells <dhowells@redhat.com>
2012-05-11 17:56:56 +08:00
/* Need a separate keylist pointer for RCU purposes */
struct key *keyring;
[PATCH] keys: Discard key spinlock and use RCU for key payload The attached patch changes the key implementation in a number of ways: (1) It removes the spinlock from the key structure. (2) The key flags are now accessed using atomic bitops instead of write-locking the key spinlock and using C bitwise operators. The three instantiation flags are dealt with with the construction semaphore held during the request_key/instantiate/negate sequence, thus rendering the spinlock superfluous. The key flags are also now bit numbers not bit masks. (3) The key payload is now accessed using RCU. This permits the recursive keyring search algorithm to be simplified greatly since no locks need be taken other than the usual RCU preemption disablement. Searching now does not require any locks or semaphores to be held; merely that the starting keyring be pinned. (4) The keyring payload now includes an RCU head so that it can be disposed of by call_rcu(). This requires that the payload be copied on unlink to prevent introducing races in copy-down vs search-up. (5) The user key payload is now a structure with the data following it. It includes an RCU head like the keyring payload and for the same reason. It also contains a data length because the data length in the key may be changed on another CPU whilst an RCU protected read is in progress on the payload. This would then see the supposed RCU payload and the on-key data length getting out of sync. I'm tempted to drop the key's datalen entirely, except that it's used in conjunction with quota management and so is a little tricky to get rid of. (6) Update the keys documentation. Signed-Off-By: David Howells <dhowells@redhat.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-06-24 13:00:49 +08:00
struct keyring_list *keylist;
int kix;
} stack[KEYRING_SEARCH_MAX_DEPTH];
struct keyring_list *keylist;
struct timespec now;
keys: check starting keyring as part of search Check the starting keyring as part of the search to (a) see if that is what we're searching for, and (b) to check it is still valid for searching. The scenario: User in process A does things that cause things to be created in its process session keyring. The user then does an su to another user and starts a new process, B. The two processes now share the same process session keyring. Process B does an NFS access which results in an upcall to gssd. When gssd attempts to instantiate the context key (to be linked into the process session keyring), it is denied access even though it has an authorization key. The order of calls is: keyctl_instantiate_key() lookup_user_key() (the default: case) search_process_keyrings(current) search_process_keyrings(rka->context) (recursive call) keyring_search_aux() keyring_search_aux() verifies the keys and keyrings underneath the top-level keyring it is given, but that top-level keyring is neither fully validated nor checked to see if it is the thing being searched for. This patch changes keyring_search_aux() to: 1) do more validation on the top keyring it is given and 2) check whether that top-level keyring is the thing being searched for Signed-off-by: Kevin Coffman <kwc@citi.umich.edu> Signed-off-by: David Howells <dhowells@redhat.com> Cc: Paul Moore <paul.moore@hp.com> Cc: Chris Wright <chrisw@sous-sol.org> Cc: Stephen Smalley <sds@tycho.nsa.gov> Cc: James Morris <jmorris@namei.org> Cc: Kevin Coffman <kwc@citi.umich.edu> Cc: Trond Myklebust <trond.myklebust@fys.uio.no> Cc: "J. Bruce Fields" <bfields@fieldses.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2008-04-29 16:01:22 +08:00
unsigned long possessed, kflags;
struct key *keyring, *key;
key_ref_t key_ref;
long err;
int sp, nkeys, kix;
keyring = key_ref_to_ptr(keyring_ref);
possessed = is_key_possessed(keyring_ref);
key_check(keyring);
/* top keyring must have search permission to begin the search */
err = key_task_permission(keyring_ref, cred, KEY_SEARCH);
if (err < 0) {
key_ref = ERR_PTR(err);
goto error;
}
key_ref = ERR_PTR(-ENOTDIR);
if (keyring->type != &key_type_keyring)
goto error;
rcu_read_lock();
now = current_kernel_time();
err = -EAGAIN;
sp = 0;
keys: check starting keyring as part of search Check the starting keyring as part of the search to (a) see if that is what we're searching for, and (b) to check it is still valid for searching. The scenario: User in process A does things that cause things to be created in its process session keyring. The user then does an su to another user and starts a new process, B. The two processes now share the same process session keyring. Process B does an NFS access which results in an upcall to gssd. When gssd attempts to instantiate the context key (to be linked into the process session keyring), it is denied access even though it has an authorization key. The order of calls is: keyctl_instantiate_key() lookup_user_key() (the default: case) search_process_keyrings(current) search_process_keyrings(rka->context) (recursive call) keyring_search_aux() keyring_search_aux() verifies the keys and keyrings underneath the top-level keyring it is given, but that top-level keyring is neither fully validated nor checked to see if it is the thing being searched for. This patch changes keyring_search_aux() to: 1) do more validation on the top keyring it is given and 2) check whether that top-level keyring is the thing being searched for Signed-off-by: Kevin Coffman <kwc@citi.umich.edu> Signed-off-by: David Howells <dhowells@redhat.com> Cc: Paul Moore <paul.moore@hp.com> Cc: Chris Wright <chrisw@sous-sol.org> Cc: Stephen Smalley <sds@tycho.nsa.gov> Cc: James Morris <jmorris@namei.org> Cc: Kevin Coffman <kwc@citi.umich.edu> Cc: Trond Myklebust <trond.myklebust@fys.uio.no> Cc: "J. Bruce Fields" <bfields@fieldses.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2008-04-29 16:01:22 +08:00
/* firstly we should check to see if this top-level keyring is what we
* are looking for */
key_ref = ERR_PTR(-EAGAIN);
kflags = keyring->flags;
if (keyring->type == type && match(keyring, description)) {
key = keyring;
if (no_state_check)
goto found;
keys: check starting keyring as part of search Check the starting keyring as part of the search to (a) see if that is what we're searching for, and (b) to check it is still valid for searching. The scenario: User in process A does things that cause things to be created in its process session keyring. The user then does an su to another user and starts a new process, B. The two processes now share the same process session keyring. Process B does an NFS access which results in an upcall to gssd. When gssd attempts to instantiate the context key (to be linked into the process session keyring), it is denied access even though it has an authorization key. The order of calls is: keyctl_instantiate_key() lookup_user_key() (the default: case) search_process_keyrings(current) search_process_keyrings(rka->context) (recursive call) keyring_search_aux() keyring_search_aux() verifies the keys and keyrings underneath the top-level keyring it is given, but that top-level keyring is neither fully validated nor checked to see if it is the thing being searched for. This patch changes keyring_search_aux() to: 1) do more validation on the top keyring it is given and 2) check whether that top-level keyring is the thing being searched for Signed-off-by: Kevin Coffman <kwc@citi.umich.edu> Signed-off-by: David Howells <dhowells@redhat.com> Cc: Paul Moore <paul.moore@hp.com> Cc: Chris Wright <chrisw@sous-sol.org> Cc: Stephen Smalley <sds@tycho.nsa.gov> Cc: James Morris <jmorris@namei.org> Cc: Kevin Coffman <kwc@citi.umich.edu> Cc: Trond Myklebust <trond.myklebust@fys.uio.no> Cc: "J. Bruce Fields" <bfields@fieldses.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2008-04-29 16:01:22 +08:00
/* check it isn't negative and hasn't expired or been
* revoked */
if (kflags & (1 << KEY_FLAG_REVOKED))
goto error_2;
if (key->expiry && now.tv_sec >= key->expiry)
goto error_2;
key_ref = ERR_PTR(key->type_data.reject_error);
keys: check starting keyring as part of search Check the starting keyring as part of the search to (a) see if that is what we're searching for, and (b) to check it is still valid for searching. The scenario: User in process A does things that cause things to be created in its process session keyring. The user then does an su to another user and starts a new process, B. The two processes now share the same process session keyring. Process B does an NFS access which results in an upcall to gssd. When gssd attempts to instantiate the context key (to be linked into the process session keyring), it is denied access even though it has an authorization key. The order of calls is: keyctl_instantiate_key() lookup_user_key() (the default: case) search_process_keyrings(current) search_process_keyrings(rka->context) (recursive call) keyring_search_aux() keyring_search_aux() verifies the keys and keyrings underneath the top-level keyring it is given, but that top-level keyring is neither fully validated nor checked to see if it is the thing being searched for. This patch changes keyring_search_aux() to: 1) do more validation on the top keyring it is given and 2) check whether that top-level keyring is the thing being searched for Signed-off-by: Kevin Coffman <kwc@citi.umich.edu> Signed-off-by: David Howells <dhowells@redhat.com> Cc: Paul Moore <paul.moore@hp.com> Cc: Chris Wright <chrisw@sous-sol.org> Cc: Stephen Smalley <sds@tycho.nsa.gov> Cc: James Morris <jmorris@namei.org> Cc: Kevin Coffman <kwc@citi.umich.edu> Cc: Trond Myklebust <trond.myklebust@fys.uio.no> Cc: "J. Bruce Fields" <bfields@fieldses.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2008-04-29 16:01:22 +08:00
if (kflags & (1 << KEY_FLAG_NEGATIVE))
goto error_2;
goto found;
}
/* otherwise, the top keyring must not be revoked, expired, or
* negatively instantiated if we are to search it */
key_ref = ERR_PTR(-EAGAIN);
if (kflags & ((1 << KEY_FLAG_INVALIDATED) |
(1 << KEY_FLAG_REVOKED) |
(1 << KEY_FLAG_NEGATIVE)) ||
keys: check starting keyring as part of search Check the starting keyring as part of the search to (a) see if that is what we're searching for, and (b) to check it is still valid for searching. The scenario: User in process A does things that cause things to be created in its process session keyring. The user then does an su to another user and starts a new process, B. The two processes now share the same process session keyring. Process B does an NFS access which results in an upcall to gssd. When gssd attempts to instantiate the context key (to be linked into the process session keyring), it is denied access even though it has an authorization key. The order of calls is: keyctl_instantiate_key() lookup_user_key() (the default: case) search_process_keyrings(current) search_process_keyrings(rka->context) (recursive call) keyring_search_aux() keyring_search_aux() verifies the keys and keyrings underneath the top-level keyring it is given, but that top-level keyring is neither fully validated nor checked to see if it is the thing being searched for. This patch changes keyring_search_aux() to: 1) do more validation on the top keyring it is given and 2) check whether that top-level keyring is the thing being searched for Signed-off-by: Kevin Coffman <kwc@citi.umich.edu> Signed-off-by: David Howells <dhowells@redhat.com> Cc: Paul Moore <paul.moore@hp.com> Cc: Chris Wright <chrisw@sous-sol.org> Cc: Stephen Smalley <sds@tycho.nsa.gov> Cc: James Morris <jmorris@namei.org> Cc: Kevin Coffman <kwc@citi.umich.edu> Cc: Trond Myklebust <trond.myklebust@fys.uio.no> Cc: "J. Bruce Fields" <bfields@fieldses.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2008-04-29 16:01:22 +08:00
(keyring->expiry && now.tv_sec >= keyring->expiry))
goto error_2;
/* start processing a new keyring */
descend:
kflags = keyring->flags;
if (kflags & ((1 << KEY_FLAG_INVALIDATED) |
(1 << KEY_FLAG_REVOKED)))
goto not_this_keyring;
[PATCH] keys: Discard key spinlock and use RCU for key payload The attached patch changes the key implementation in a number of ways: (1) It removes the spinlock from the key structure. (2) The key flags are now accessed using atomic bitops instead of write-locking the key spinlock and using C bitwise operators. The three instantiation flags are dealt with with the construction semaphore held during the request_key/instantiate/negate sequence, thus rendering the spinlock superfluous. The key flags are also now bit numbers not bit masks. (3) The key payload is now accessed using RCU. This permits the recursive keyring search algorithm to be simplified greatly since no locks need be taken other than the usual RCU preemption disablement. Searching now does not require any locks or semaphores to be held; merely that the starting keyring be pinned. (4) The keyring payload now includes an RCU head so that it can be disposed of by call_rcu(). This requires that the payload be copied on unlink to prevent introducing races in copy-down vs search-up. (5) The user key payload is now a structure with the data following it. It includes an RCU head like the keyring payload and for the same reason. It also contains a data length because the data length in the key may be changed on another CPU whilst an RCU protected read is in progress on the payload. This would then see the supposed RCU payload and the on-key data length getting out of sync. I'm tempted to drop the key's datalen entirely, except that it's used in conjunction with quota management and so is a little tricky to get rid of. (6) Update the keys documentation. Signed-Off-By: David Howells <dhowells@redhat.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-06-24 13:00:49 +08:00
keylist = rcu_dereference(keyring->payload.subscriptions);
if (!keylist)
goto not_this_keyring;
/* iterate through the keys in this keyring first */
nkeys = keylist->nkeys;
smp_rmb();
for (kix = 0; kix < nkeys; kix++) {
key = rcu_dereference(keylist->keys[kix]);
keys: check starting keyring as part of search Check the starting keyring as part of the search to (a) see if that is what we're searching for, and (b) to check it is still valid for searching. The scenario: User in process A does things that cause things to be created in its process session keyring. The user then does an su to another user and starts a new process, B. The two processes now share the same process session keyring. Process B does an NFS access which results in an upcall to gssd. When gssd attempts to instantiate the context key (to be linked into the process session keyring), it is denied access even though it has an authorization key. The order of calls is: keyctl_instantiate_key() lookup_user_key() (the default: case) search_process_keyrings(current) search_process_keyrings(rka->context) (recursive call) keyring_search_aux() keyring_search_aux() verifies the keys and keyrings underneath the top-level keyring it is given, but that top-level keyring is neither fully validated nor checked to see if it is the thing being searched for. This patch changes keyring_search_aux() to: 1) do more validation on the top keyring it is given and 2) check whether that top-level keyring is the thing being searched for Signed-off-by: Kevin Coffman <kwc@citi.umich.edu> Signed-off-by: David Howells <dhowells@redhat.com> Cc: Paul Moore <paul.moore@hp.com> Cc: Chris Wright <chrisw@sous-sol.org> Cc: Stephen Smalley <sds@tycho.nsa.gov> Cc: James Morris <jmorris@namei.org> Cc: Kevin Coffman <kwc@citi.umich.edu> Cc: Trond Myklebust <trond.myklebust@fys.uio.no> Cc: "J. Bruce Fields" <bfields@fieldses.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2008-04-29 16:01:22 +08:00
kflags = key->flags;
/* ignore keys not of this type */
if (key->type != type)
continue;
/* skip invalidated, revoked and expired keys */
if (!no_state_check) {
if (kflags & ((1 << KEY_FLAG_INVALIDATED) |
(1 << KEY_FLAG_REVOKED)))
continue;
if (key->expiry && now.tv_sec >= key->expiry)
continue;
}
/* keys that don't match */
if (!match(key, description))
continue;
/* key must have search permissions */
if (key_task_permission(make_key_ref(key, possessed),
CRED: Inaugurate COW credentials Inaugurate copy-on-write credentials management. This uses RCU to manage the credentials pointer in the task_struct with respect to accesses by other tasks. A process may only modify its own credentials, and so does not need locking to access or modify its own credentials. A mutex (cred_replace_mutex) is added to the task_struct to control the effect of PTRACE_ATTACHED on credential calculations, particularly with respect to execve(). With this patch, the contents of an active credentials struct may not be changed directly; rather a new set of credentials must be prepared, modified and committed using something like the following sequence of events: struct cred *new = prepare_creds(); int ret = blah(new); if (ret < 0) { abort_creds(new); return ret; } return commit_creds(new); There are some exceptions to this rule: the keyrings pointed to by the active credentials may be instantiated - keyrings violate the COW rule as managing COW keyrings is tricky, given that it is possible for a task to directly alter the keys in a keyring in use by another task. To help enforce this, various pointers to sets of credentials, such as those in the task_struct, are declared const. The purpose of this is compile-time discouragement of altering credentials through those pointers. Once a set of credentials has been made public through one of these pointers, it may not be modified, except under special circumstances: (1) Its reference count may incremented and decremented. (2) The keyrings to which it points may be modified, but not replaced. The only safe way to modify anything else is to create a replacement and commit using the functions described in Documentation/credentials.txt (which will be added by a later patch). This patch and the preceding patches have been tested with the LTP SELinux testsuite. This patch makes several logical sets of alteration: (1) execve(). This now prepares and commits credentials in various places in the security code rather than altering the current creds directly. (2) Temporary credential overrides. do_coredump() and sys_faccessat() now prepare their own credentials and temporarily override the ones currently on the acting thread, whilst preventing interference from other threads by holding cred_replace_mutex on the thread being dumped. This will be replaced in a future patch by something that hands down the credentials directly to the functions being called, rather than altering the task's objective credentials. (3) LSM interface. A number of functions have been changed, added or removed: (*) security_capset_check(), ->capset_check() (*) security_capset_set(), ->capset_set() Removed in favour of security_capset(). (*) security_capset(), ->capset() New. This is passed a pointer to the new creds, a pointer to the old creds and the proposed capability sets. It should fill in the new creds or return an error. All pointers, barring the pointer to the new creds, are now const. (*) security_bprm_apply_creds(), ->bprm_apply_creds() Changed; now returns a value, which will cause the process to be killed if it's an error. (*) security_task_alloc(), ->task_alloc_security() Removed in favour of security_prepare_creds(). (*) security_cred_free(), ->cred_free() New. Free security data attached to cred->security. (*) security_prepare_creds(), ->cred_prepare() New. Duplicate any security data attached to cred->security. (*) security_commit_creds(), ->cred_commit() New. Apply any security effects for the upcoming installation of new security by commit_creds(). (*) security_task_post_setuid(), ->task_post_setuid() Removed in favour of security_task_fix_setuid(). (*) security_task_fix_setuid(), ->task_fix_setuid() Fix up the proposed new credentials for setuid(). This is used by cap_set_fix_setuid() to implicitly adjust capabilities in line with setuid() changes. Changes are made to the new credentials, rather than the task itself as in security_task_post_setuid(). (*) security_task_reparent_to_init(), ->task_reparent_to_init() Removed. Instead the task being reparented to init is referred directly to init's credentials. NOTE! This results in the loss of some state: SELinux's osid no longer records the sid of the thread that forked it. (*) security_key_alloc(), ->key_alloc() (*) security_key_permission(), ->key_permission() Changed. These now take cred pointers rather than task pointers to refer to the security context. (4) sys_capset(). This has been simplified and uses less locking. The LSM functions it calls have been merged. (5) reparent_to_kthreadd(). This gives the current thread the same credentials as init by simply using commit_thread() to point that way. (6) __sigqueue_alloc() and switch_uid() __sigqueue_alloc() can't stop the target task from changing its creds beneath it, so this function gets a reference to the currently applicable user_struct which it then passes into the sigqueue struct it returns if successful. switch_uid() is now called from commit_creds(), and possibly should be folded into that. commit_creds() should take care of protecting __sigqueue_alloc(). (7) [sg]et[ug]id() and co and [sg]et_current_groups. The set functions now all use prepare_creds(), commit_creds() and abort_creds() to build and check a new set of credentials before applying it. security_task_set[ug]id() is called inside the prepared section. This guarantees that nothing else will affect the creds until we've finished. The calling of set_dumpable() has been moved into commit_creds(). Much of the functionality of set_user() has been moved into commit_creds(). The get functions all simply access the data directly. (8) security_task_prctl() and cap_task_prctl(). security_task_prctl() has been modified to return -ENOSYS if it doesn't want to handle a function, or otherwise return the return value directly rather than through an argument. Additionally, cap_task_prctl() now prepares a new set of credentials, even if it doesn't end up using it. (9) Keyrings. A number of changes have been made to the keyrings code: (a) switch_uid_keyring(), copy_keys(), exit_keys() and suid_keys() have all been dropped and built in to the credentials functions directly. They may want separating out again later. (b) key_alloc() and search_process_keyrings() now take a cred pointer rather than a task pointer to specify the security context. (c) copy_creds() gives a new thread within the same thread group a new thread keyring if its parent had one, otherwise it discards the thread keyring. (d) The authorisation key now points directly to the credentials to extend the search into rather pointing to the task that carries them. (e) Installing thread, process or session keyrings causes a new set of credentials to be created, even though it's not strictly necessary for process or session keyrings (they're shared). (10) Usermode helper. The usermode helper code now carries a cred struct pointer in its subprocess_info struct instead of a new session keyring pointer. This set of credentials is derived from init_cred and installed on the new process after it has been cloned. call_usermodehelper_setup() allocates the new credentials and call_usermodehelper_freeinfo() discards them if they haven't been used. A special cred function (prepare_usermodeinfo_creds()) is provided specifically for call_usermodehelper_setup() to call. call_usermodehelper_setkeys() adjusts the credentials to sport the supplied keyring as the new session keyring. (11) SELinux. SELinux has a number of changes, in addition to those to support the LSM interface changes mentioned above: (a) selinux_setprocattr() no longer does its check for whether the current ptracer can access processes with the new SID inside the lock that covers getting the ptracer's SID. Whilst this lock ensures that the check is done with the ptracer pinned, the result is only valid until the lock is released, so there's no point doing it inside the lock. (12) is_single_threaded(). This function has been extracted from selinux_setprocattr() and put into a file of its own in the lib/ directory as join_session_keyring() now wants to use it too. The code in SELinux just checked to see whether a task shared mm_structs with other tasks (CLONE_VM), but that isn't good enough. We really want to know if they're part of the same thread group (CLONE_THREAD). (13) nfsd. The NFS server daemon now has to use the COW credentials to set the credentials it is going to use. It really needs to pass the credentials down to the functions it calls, but it can't do that until other patches in this series have been applied. Signed-off-by: David Howells <dhowells@redhat.com> Acked-by: James Morris <jmorris@namei.org> Signed-off-by: James Morris <jmorris@namei.org>
2008-11-14 07:39:23 +08:00
cred, KEY_SEARCH) < 0)
continue;
if (no_state_check)
goto found;
keys: check starting keyring as part of search Check the starting keyring as part of the search to (a) see if that is what we're searching for, and (b) to check it is still valid for searching. The scenario: User in process A does things that cause things to be created in its process session keyring. The user then does an su to another user and starts a new process, B. The two processes now share the same process session keyring. Process B does an NFS access which results in an upcall to gssd. When gssd attempts to instantiate the context key (to be linked into the process session keyring), it is denied access even though it has an authorization key. The order of calls is: keyctl_instantiate_key() lookup_user_key() (the default: case) search_process_keyrings(current) search_process_keyrings(rka->context) (recursive call) keyring_search_aux() keyring_search_aux() verifies the keys and keyrings underneath the top-level keyring it is given, but that top-level keyring is neither fully validated nor checked to see if it is the thing being searched for. This patch changes keyring_search_aux() to: 1) do more validation on the top keyring it is given and 2) check whether that top-level keyring is the thing being searched for Signed-off-by: Kevin Coffman <kwc@citi.umich.edu> Signed-off-by: David Howells <dhowells@redhat.com> Cc: Paul Moore <paul.moore@hp.com> Cc: Chris Wright <chrisw@sous-sol.org> Cc: Stephen Smalley <sds@tycho.nsa.gov> Cc: James Morris <jmorris@namei.org> Cc: Kevin Coffman <kwc@citi.umich.edu> Cc: Trond Myklebust <trond.myklebust@fys.uio.no> Cc: "J. Bruce Fields" <bfields@fieldses.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2008-04-29 16:01:22 +08:00
/* we set a different error code if we pass a negative key */
if (kflags & (1 << KEY_FLAG_NEGATIVE)) {
err = key->type_data.reject_error;
continue;
}
goto found;
}
/* search through the keyrings nested in this one */
kix = 0;
ascend:
nkeys = keylist->nkeys;
smp_rmb();
for (; kix < nkeys; kix++) {
key = rcu_dereference(keylist->keys[kix]);
if (key->type != &key_type_keyring)
[PATCH] keys: Discard key spinlock and use RCU for key payload The attached patch changes the key implementation in a number of ways: (1) It removes the spinlock from the key structure. (2) The key flags are now accessed using atomic bitops instead of write-locking the key spinlock and using C bitwise operators. The three instantiation flags are dealt with with the construction semaphore held during the request_key/instantiate/negate sequence, thus rendering the spinlock superfluous. The key flags are also now bit numbers not bit masks. (3) The key payload is now accessed using RCU. This permits the recursive keyring search algorithm to be simplified greatly since no locks need be taken other than the usual RCU preemption disablement. Searching now does not require any locks or semaphores to be held; merely that the starting keyring be pinned. (4) The keyring payload now includes an RCU head so that it can be disposed of by call_rcu(). This requires that the payload be copied on unlink to prevent introducing races in copy-down vs search-up. (5) The user key payload is now a structure with the data following it. It includes an RCU head like the keyring payload and for the same reason. It also contains a data length because the data length in the key may be changed on another CPU whilst an RCU protected read is in progress on the payload. This would then see the supposed RCU payload and the on-key data length getting out of sync. I'm tempted to drop the key's datalen entirely, except that it's used in conjunction with quota management and so is a little tricky to get rid of. (6) Update the keys documentation. Signed-Off-By: David Howells <dhowells@redhat.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-06-24 13:00:49 +08:00
continue;
/* recursively search nested keyrings
* - only search keyrings for which we have search permission
*/
if (sp >= KEYRING_SEARCH_MAX_DEPTH)
[PATCH] keys: Discard key spinlock and use RCU for key payload The attached patch changes the key implementation in a number of ways: (1) It removes the spinlock from the key structure. (2) The key flags are now accessed using atomic bitops instead of write-locking the key spinlock and using C bitwise operators. The three instantiation flags are dealt with with the construction semaphore held during the request_key/instantiate/negate sequence, thus rendering the spinlock superfluous. The key flags are also now bit numbers not bit masks. (3) The key payload is now accessed using RCU. This permits the recursive keyring search algorithm to be simplified greatly since no locks need be taken other than the usual RCU preemption disablement. Searching now does not require any locks or semaphores to be held; merely that the starting keyring be pinned. (4) The keyring payload now includes an RCU head so that it can be disposed of by call_rcu(). This requires that the payload be copied on unlink to prevent introducing races in copy-down vs search-up. (5) The user key payload is now a structure with the data following it. It includes an RCU head like the keyring payload and for the same reason. It also contains a data length because the data length in the key may be changed on another CPU whilst an RCU protected read is in progress on the payload. This would then see the supposed RCU payload and the on-key data length getting out of sync. I'm tempted to drop the key's datalen entirely, except that it's used in conjunction with quota management and so is a little tricky to get rid of. (6) Update the keys documentation. Signed-Off-By: David Howells <dhowells@redhat.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-06-24 13:00:49 +08:00
continue;
if (key_task_permission(make_key_ref(key, possessed),
CRED: Inaugurate COW credentials Inaugurate copy-on-write credentials management. This uses RCU to manage the credentials pointer in the task_struct with respect to accesses by other tasks. A process may only modify its own credentials, and so does not need locking to access or modify its own credentials. A mutex (cred_replace_mutex) is added to the task_struct to control the effect of PTRACE_ATTACHED on credential calculations, particularly with respect to execve(). With this patch, the contents of an active credentials struct may not be changed directly; rather a new set of credentials must be prepared, modified and committed using something like the following sequence of events: struct cred *new = prepare_creds(); int ret = blah(new); if (ret < 0) { abort_creds(new); return ret; } return commit_creds(new); There are some exceptions to this rule: the keyrings pointed to by the active credentials may be instantiated - keyrings violate the COW rule as managing COW keyrings is tricky, given that it is possible for a task to directly alter the keys in a keyring in use by another task. To help enforce this, various pointers to sets of credentials, such as those in the task_struct, are declared const. The purpose of this is compile-time discouragement of altering credentials through those pointers. Once a set of credentials has been made public through one of these pointers, it may not be modified, except under special circumstances: (1) Its reference count may incremented and decremented. (2) The keyrings to which it points may be modified, but not replaced. The only safe way to modify anything else is to create a replacement and commit using the functions described in Documentation/credentials.txt (which will be added by a later patch). This patch and the preceding patches have been tested with the LTP SELinux testsuite. This patch makes several logical sets of alteration: (1) execve(). This now prepares and commits credentials in various places in the security code rather than altering the current creds directly. (2) Temporary credential overrides. do_coredump() and sys_faccessat() now prepare their own credentials and temporarily override the ones currently on the acting thread, whilst preventing interference from other threads by holding cred_replace_mutex on the thread being dumped. This will be replaced in a future patch by something that hands down the credentials directly to the functions being called, rather than altering the task's objective credentials. (3) LSM interface. A number of functions have been changed, added or removed: (*) security_capset_check(), ->capset_check() (*) security_capset_set(), ->capset_set() Removed in favour of security_capset(). (*) security_capset(), ->capset() New. This is passed a pointer to the new creds, a pointer to the old creds and the proposed capability sets. It should fill in the new creds or return an error. All pointers, barring the pointer to the new creds, are now const. (*) security_bprm_apply_creds(), ->bprm_apply_creds() Changed; now returns a value, which will cause the process to be killed if it's an error. (*) security_task_alloc(), ->task_alloc_security() Removed in favour of security_prepare_creds(). (*) security_cred_free(), ->cred_free() New. Free security data attached to cred->security. (*) security_prepare_creds(), ->cred_prepare() New. Duplicate any security data attached to cred->security. (*) security_commit_creds(), ->cred_commit() New. Apply any security effects for the upcoming installation of new security by commit_creds(). (*) security_task_post_setuid(), ->task_post_setuid() Removed in favour of security_task_fix_setuid(). (*) security_task_fix_setuid(), ->task_fix_setuid() Fix up the proposed new credentials for setuid(). This is used by cap_set_fix_setuid() to implicitly adjust capabilities in line with setuid() changes. Changes are made to the new credentials, rather than the task itself as in security_task_post_setuid(). (*) security_task_reparent_to_init(), ->task_reparent_to_init() Removed. Instead the task being reparented to init is referred directly to init's credentials. NOTE! This results in the loss of some state: SELinux's osid no longer records the sid of the thread that forked it. (*) security_key_alloc(), ->key_alloc() (*) security_key_permission(), ->key_permission() Changed. These now take cred pointers rather than task pointers to refer to the security context. (4) sys_capset(). This has been simplified and uses less locking. The LSM functions it calls have been merged. (5) reparent_to_kthreadd(). This gives the current thread the same credentials as init by simply using commit_thread() to point that way. (6) __sigqueue_alloc() and switch_uid() __sigqueue_alloc() can't stop the target task from changing its creds beneath it, so this function gets a reference to the currently applicable user_struct which it then passes into the sigqueue struct it returns if successful. switch_uid() is now called from commit_creds(), and possibly should be folded into that. commit_creds() should take care of protecting __sigqueue_alloc(). (7) [sg]et[ug]id() and co and [sg]et_current_groups. The set functions now all use prepare_creds(), commit_creds() and abort_creds() to build and check a new set of credentials before applying it. security_task_set[ug]id() is called inside the prepared section. This guarantees that nothing else will affect the creds until we've finished. The calling of set_dumpable() has been moved into commit_creds(). Much of the functionality of set_user() has been moved into commit_creds(). The get functions all simply access the data directly. (8) security_task_prctl() and cap_task_prctl(). security_task_prctl() has been modified to return -ENOSYS if it doesn't want to handle a function, or otherwise return the return value directly rather than through an argument. Additionally, cap_task_prctl() now prepares a new set of credentials, even if it doesn't end up using it. (9) Keyrings. A number of changes have been made to the keyrings code: (a) switch_uid_keyring(), copy_keys(), exit_keys() and suid_keys() have all been dropped and built in to the credentials functions directly. They may want separating out again later. (b) key_alloc() and search_process_keyrings() now take a cred pointer rather than a task pointer to specify the security context. (c) copy_creds() gives a new thread within the same thread group a new thread keyring if its parent had one, otherwise it discards the thread keyring. (d) The authorisation key now points directly to the credentials to extend the search into rather pointing to the task that carries them. (e) Installing thread, process or session keyrings causes a new set of credentials to be created, even though it's not strictly necessary for process or session keyrings (they're shared). (10) Usermode helper. The usermode helper code now carries a cred struct pointer in its subprocess_info struct instead of a new session keyring pointer. This set of credentials is derived from init_cred and installed on the new process after it has been cloned. call_usermodehelper_setup() allocates the new credentials and call_usermodehelper_freeinfo() discards them if they haven't been used. A special cred function (prepare_usermodeinfo_creds()) is provided specifically for call_usermodehelper_setup() to call. call_usermodehelper_setkeys() adjusts the credentials to sport the supplied keyring as the new session keyring. (11) SELinux. SELinux has a number of changes, in addition to those to support the LSM interface changes mentioned above: (a) selinux_setprocattr() no longer does its check for whether the current ptracer can access processes with the new SID inside the lock that covers getting the ptracer's SID. Whilst this lock ensures that the check is done with the ptracer pinned, the result is only valid until the lock is released, so there's no point doing it inside the lock. (12) is_single_threaded(). This function has been extracted from selinux_setprocattr() and put into a file of its own in the lib/ directory as join_session_keyring() now wants to use it too. The code in SELinux just checked to see whether a task shared mm_structs with other tasks (CLONE_VM), but that isn't good enough. We really want to know if they're part of the same thread group (CLONE_THREAD). (13) nfsd. The NFS server daemon now has to use the COW credentials to set the credentials it is going to use. It really needs to pass the credentials down to the functions it calls, but it can't do that until other patches in this series have been applied. Signed-off-by: David Howells <dhowells@redhat.com> Acked-by: James Morris <jmorris@namei.org> Signed-off-by: James Morris <jmorris@namei.org>
2008-11-14 07:39:23 +08:00
cred, KEY_SEARCH) < 0)
[PATCH] keys: Discard key spinlock and use RCU for key payload The attached patch changes the key implementation in a number of ways: (1) It removes the spinlock from the key structure. (2) The key flags are now accessed using atomic bitops instead of write-locking the key spinlock and using C bitwise operators. The three instantiation flags are dealt with with the construction semaphore held during the request_key/instantiate/negate sequence, thus rendering the spinlock superfluous. The key flags are also now bit numbers not bit masks. (3) The key payload is now accessed using RCU. This permits the recursive keyring search algorithm to be simplified greatly since no locks need be taken other than the usual RCU preemption disablement. Searching now does not require any locks or semaphores to be held; merely that the starting keyring be pinned. (4) The keyring payload now includes an RCU head so that it can be disposed of by call_rcu(). This requires that the payload be copied on unlink to prevent introducing races in copy-down vs search-up. (5) The user key payload is now a structure with the data following it. It includes an RCU head like the keyring payload and for the same reason. It also contains a data length because the data length in the key may be changed on another CPU whilst an RCU protected read is in progress on the payload. This would then see the supposed RCU payload and the on-key data length getting out of sync. I'm tempted to drop the key's datalen entirely, except that it's used in conjunction with quota management and so is a little tricky to get rid of. (6) Update the keys documentation. Signed-Off-By: David Howells <dhowells@redhat.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-06-24 13:00:49 +08:00
continue;
/* stack the current position */
KEYS: Do LRU discard in full keyrings Do an LRU discard in keyrings that are full rather than returning ENFILE. To perform this, a time_t is added to the key struct and updated by the creation of a link to a key and by a key being found as the result of a search. At the completion of a successful search, the keyrings in the path between the root of the search and the first found link to it also have their last-used times updated. Note that discarding a link to a key from a keyring does not necessarily destroy the key as there may be references held by other places. An alternate discard method that might suffice is to perform FIFO discard from the keyring, using the spare 2-byte hole in the keylist header as the index of the next link to be discarded. This is useful when using a keyring as a cache for DNS results or foreign filesystem IDs. This can be tested by the following. As root do: echo 1000 >/proc/sys/kernel/keys/root_maxkeys kr=`keyctl newring foo @s` for ((i=0; i<2000; i++)); do keyctl add user a$i a $kr; done Without this patch ENFILE should be reported when the keyring fills up. With this patch, the keyring discards keys in an LRU fashion. Note that the stored LRU time has a granularity of 1s. After doing this, /proc/key-users can be observed and should show that most of the 2000 keys have been discarded: [root@andromeda ~]# cat /proc/key-users 0: 517 516/516 513/1000 5249/20000 The "513/1000" here is the number of quota-accounted keys present for this user out of the maximum permitted. In /proc/keys, the keyring shows the number of keys it has and the number of slots it has allocated: [root@andromeda ~]# grep foo /proc/keys 200c64c4 I--Q-- 1 perm 3b3f0000 0 0 keyring foo: 509/509 The maximum is (PAGE_SIZE - header) / key pointer size. That's typically 509 on a 64-bit system and 1020 on a 32-bit system. Signed-off-by: David Howells <dhowells@redhat.com>
2012-05-11 17:56:56 +08:00
stack[sp].keyring = keyring;
[PATCH] keys: Discard key spinlock and use RCU for key payload The attached patch changes the key implementation in a number of ways: (1) It removes the spinlock from the key structure. (2) The key flags are now accessed using atomic bitops instead of write-locking the key spinlock and using C bitwise operators. The three instantiation flags are dealt with with the construction semaphore held during the request_key/instantiate/negate sequence, thus rendering the spinlock superfluous. The key flags are also now bit numbers not bit masks. (3) The key payload is now accessed using RCU. This permits the recursive keyring search algorithm to be simplified greatly since no locks need be taken other than the usual RCU preemption disablement. Searching now does not require any locks or semaphores to be held; merely that the starting keyring be pinned. (4) The keyring payload now includes an RCU head so that it can be disposed of by call_rcu(). This requires that the payload be copied on unlink to prevent introducing races in copy-down vs search-up. (5) The user key payload is now a structure with the data following it. It includes an RCU head like the keyring payload and for the same reason. It also contains a data length because the data length in the key may be changed on another CPU whilst an RCU protected read is in progress on the payload. This would then see the supposed RCU payload and the on-key data length getting out of sync. I'm tempted to drop the key's datalen entirely, except that it's used in conjunction with quota management and so is a little tricky to get rid of. (6) Update the keys documentation. Signed-Off-By: David Howells <dhowells@redhat.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-06-24 13:00:49 +08:00
stack[sp].keylist = keylist;
stack[sp].kix = kix;
sp++;
/* begin again with the new keyring */
keyring = key;
goto descend;
}
/* the keyring we're looking at was disqualified or didn't contain a
* matching key */
not_this_keyring:
if (sp > 0) {
/* resume the processing of a keyring higher up in the tree */
sp--;
KEYS: Do LRU discard in full keyrings Do an LRU discard in keyrings that are full rather than returning ENFILE. To perform this, a time_t is added to the key struct and updated by the creation of a link to a key and by a key being found as the result of a search. At the completion of a successful search, the keyrings in the path between the root of the search and the first found link to it also have their last-used times updated. Note that discarding a link to a key from a keyring does not necessarily destroy the key as there may be references held by other places. An alternate discard method that might suffice is to perform FIFO discard from the keyring, using the spare 2-byte hole in the keylist header as the index of the next link to be discarded. This is useful when using a keyring as a cache for DNS results or foreign filesystem IDs. This can be tested by the following. As root do: echo 1000 >/proc/sys/kernel/keys/root_maxkeys kr=`keyctl newring foo @s` for ((i=0; i<2000; i++)); do keyctl add user a$i a $kr; done Without this patch ENFILE should be reported when the keyring fills up. With this patch, the keyring discards keys in an LRU fashion. Note that the stored LRU time has a granularity of 1s. After doing this, /proc/key-users can be observed and should show that most of the 2000 keys have been discarded: [root@andromeda ~]# cat /proc/key-users 0: 517 516/516 513/1000 5249/20000 The "513/1000" here is the number of quota-accounted keys present for this user out of the maximum permitted. In /proc/keys, the keyring shows the number of keys it has and the number of slots it has allocated: [root@andromeda ~]# grep foo /proc/keys 200c64c4 I--Q-- 1 perm 3b3f0000 0 0 keyring foo: 509/509 The maximum is (PAGE_SIZE - header) / key pointer size. That's typically 509 on a 64-bit system and 1020 on a 32-bit system. Signed-off-by: David Howells <dhowells@redhat.com>
2012-05-11 17:56:56 +08:00
keyring = stack[sp].keyring;
[PATCH] keys: Discard key spinlock and use RCU for key payload The attached patch changes the key implementation in a number of ways: (1) It removes the spinlock from the key structure. (2) The key flags are now accessed using atomic bitops instead of write-locking the key spinlock and using C bitwise operators. The three instantiation flags are dealt with with the construction semaphore held during the request_key/instantiate/negate sequence, thus rendering the spinlock superfluous. The key flags are also now bit numbers not bit masks. (3) The key payload is now accessed using RCU. This permits the recursive keyring search algorithm to be simplified greatly since no locks need be taken other than the usual RCU preemption disablement. Searching now does not require any locks or semaphores to be held; merely that the starting keyring be pinned. (4) The keyring payload now includes an RCU head so that it can be disposed of by call_rcu(). This requires that the payload be copied on unlink to prevent introducing races in copy-down vs search-up. (5) The user key payload is now a structure with the data following it. It includes an RCU head like the keyring payload and for the same reason. It also contains a data length because the data length in the key may be changed on another CPU whilst an RCU protected read is in progress on the payload. This would then see the supposed RCU payload and the on-key data length getting out of sync. I'm tempted to drop the key's datalen entirely, except that it's used in conjunction with quota management and so is a little tricky to get rid of. (6) Update the keys documentation. Signed-Off-By: David Howells <dhowells@redhat.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-06-24 13:00:49 +08:00
keylist = stack[sp].keylist;
kix = stack[sp].kix + 1;
goto ascend;
}
key_ref = ERR_PTR(err);
goto error_2;
/* we found a viable match */
found:
atomic_inc(&key->usage);
KEYS: Do LRU discard in full keyrings Do an LRU discard in keyrings that are full rather than returning ENFILE. To perform this, a time_t is added to the key struct and updated by the creation of a link to a key and by a key being found as the result of a search. At the completion of a successful search, the keyrings in the path between the root of the search and the first found link to it also have their last-used times updated. Note that discarding a link to a key from a keyring does not necessarily destroy the key as there may be references held by other places. An alternate discard method that might suffice is to perform FIFO discard from the keyring, using the spare 2-byte hole in the keylist header as the index of the next link to be discarded. This is useful when using a keyring as a cache for DNS results or foreign filesystem IDs. This can be tested by the following. As root do: echo 1000 >/proc/sys/kernel/keys/root_maxkeys kr=`keyctl newring foo @s` for ((i=0; i<2000; i++)); do keyctl add user a$i a $kr; done Without this patch ENFILE should be reported when the keyring fills up. With this patch, the keyring discards keys in an LRU fashion. Note that the stored LRU time has a granularity of 1s. After doing this, /proc/key-users can be observed and should show that most of the 2000 keys have been discarded: [root@andromeda ~]# cat /proc/key-users 0: 517 516/516 513/1000 5249/20000 The "513/1000" here is the number of quota-accounted keys present for this user out of the maximum permitted. In /proc/keys, the keyring shows the number of keys it has and the number of slots it has allocated: [root@andromeda ~]# grep foo /proc/keys 200c64c4 I--Q-- 1 perm 3b3f0000 0 0 keyring foo: 509/509 The maximum is (PAGE_SIZE - header) / key pointer size. That's typically 509 on a 64-bit system and 1020 on a 32-bit system. Signed-off-by: David Howells <dhowells@redhat.com>
2012-05-11 17:56:56 +08:00
key->last_used_at = now.tv_sec;
keyring->last_used_at = now.tv_sec;
while (sp > 0)
stack[--sp].keyring->last_used_at = now.tv_sec;
key_check(key);
key_ref = make_key_ref(key, possessed);
error_2:
[PATCH] keys: Discard key spinlock and use RCU for key payload The attached patch changes the key implementation in a number of ways: (1) It removes the spinlock from the key structure. (2) The key flags are now accessed using atomic bitops instead of write-locking the key spinlock and using C bitwise operators. The three instantiation flags are dealt with with the construction semaphore held during the request_key/instantiate/negate sequence, thus rendering the spinlock superfluous. The key flags are also now bit numbers not bit masks. (3) The key payload is now accessed using RCU. This permits the recursive keyring search algorithm to be simplified greatly since no locks need be taken other than the usual RCU preemption disablement. Searching now does not require any locks or semaphores to be held; merely that the starting keyring be pinned. (4) The keyring payload now includes an RCU head so that it can be disposed of by call_rcu(). This requires that the payload be copied on unlink to prevent introducing races in copy-down vs search-up. (5) The user key payload is now a structure with the data following it. It includes an RCU head like the keyring payload and for the same reason. It also contains a data length because the data length in the key may be changed on another CPU whilst an RCU protected read is in progress on the payload. This would then see the supposed RCU payload and the on-key data length getting out of sync. I'm tempted to drop the key's datalen entirely, except that it's used in conjunction with quota management and so is a little tricky to get rid of. (6) Update the keys documentation. Signed-Off-By: David Howells <dhowells@redhat.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-06-24 13:00:49 +08:00
rcu_read_unlock();
error:
return key_ref;
}
/**
* keyring_search - Search the supplied keyring tree for a matching key
* @keyring: The root of the keyring tree to be searched.
* @type: The type of keyring we want to find.
* @description: The name of the keyring we want to find.
*
* As keyring_search_aux() above, but using the current task's credentials and
* type's default matching function.
*/
key_ref_t keyring_search(key_ref_t keyring,
struct key_type *type,
const char *description)
{
[PATCH] Keys: Make request-key create an authorisation key The attached patch makes the following changes: (1) There's a new special key type called ".request_key_auth". This is an authorisation key for when one process requests a key and another process is started to construct it. This type of key cannot be created by the user; nor can it be requested by kernel services. Authorisation keys hold two references: (a) Each refers to a key being constructed. When the key being constructed is instantiated the authorisation key is revoked, rendering it of no further use. (b) The "authorising process". This is either: (i) the process that called request_key(), or: (ii) if the process that called request_key() itself had an authorisation key in its session keyring, then the authorising process referred to by that authorisation key will also be referred to by the new authorisation key. This means that the process that initiated a chain of key requests will authorise the lot of them, and will, by default, wind up with the keys obtained from them in its keyrings. (2) request_key() creates an authorisation key which is then passed to /sbin/request-key in as part of a new session keyring. (3) When request_key() is searching for a key to hand back to the caller, if it comes across an authorisation key in the session keyring of the calling process, it will also search the keyrings of the process specified therein and it will use the specified process's credentials (fsuid, fsgid, groups) to do that rather than the calling process's credentials. This allows a process started by /sbin/request-key to find keys belonging to the authorising process. (4) A key can be read, even if the process executing KEYCTL_READ doesn't have direct read or search permission if that key is contained within the keyrings of a process specified by an authorisation key found within the calling process's session keyring, and is searchable using the credentials of the authorising process. This allows a process started by /sbin/request-key to read keys belonging to the authorising process. (5) The magic KEY_SPEC_*_KEYRING key IDs when passed to KEYCTL_INSTANTIATE or KEYCTL_NEGATE will specify a keyring of the authorising process, rather than the process doing the instantiation. (6) One of the process keyrings can be nominated as the default to which request_key() should attach new keys if not otherwise specified. This is done with KEYCTL_SET_REQKEY_KEYRING and one of the KEY_REQKEY_DEFL_* constants. The current setting can also be read using this call. (7) request_key() is partially interruptible. If it is waiting for another process to finish constructing a key, it can be interrupted. This permits a request-key cycle to be broken without recourse to rebooting. Signed-Off-By: David Howells <dhowells@redhat.com> Signed-Off-By: Benoit Boissinot <benoit.boissinot@ens-lyon.org> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-06-24 13:00:56 +08:00
if (!type->match)
return ERR_PTR(-ENOKEY);
CRED: Inaugurate COW credentials Inaugurate copy-on-write credentials management. This uses RCU to manage the credentials pointer in the task_struct with respect to accesses by other tasks. A process may only modify its own credentials, and so does not need locking to access or modify its own credentials. A mutex (cred_replace_mutex) is added to the task_struct to control the effect of PTRACE_ATTACHED on credential calculations, particularly with respect to execve(). With this patch, the contents of an active credentials struct may not be changed directly; rather a new set of credentials must be prepared, modified and committed using something like the following sequence of events: struct cred *new = prepare_creds(); int ret = blah(new); if (ret < 0) { abort_creds(new); return ret; } return commit_creds(new); There are some exceptions to this rule: the keyrings pointed to by the active credentials may be instantiated - keyrings violate the COW rule as managing COW keyrings is tricky, given that it is possible for a task to directly alter the keys in a keyring in use by another task. To help enforce this, various pointers to sets of credentials, such as those in the task_struct, are declared const. The purpose of this is compile-time discouragement of altering credentials through those pointers. Once a set of credentials has been made public through one of these pointers, it may not be modified, except under special circumstances: (1) Its reference count may incremented and decremented. (2) The keyrings to which it points may be modified, but not replaced. The only safe way to modify anything else is to create a replacement and commit using the functions described in Documentation/credentials.txt (which will be added by a later patch). This patch and the preceding patches have been tested with the LTP SELinux testsuite. This patch makes several logical sets of alteration: (1) execve(). This now prepares and commits credentials in various places in the security code rather than altering the current creds directly. (2) Temporary credential overrides. do_coredump() and sys_faccessat() now prepare their own credentials and temporarily override the ones currently on the acting thread, whilst preventing interference from other threads by holding cred_replace_mutex on the thread being dumped. This will be replaced in a future patch by something that hands down the credentials directly to the functions being called, rather than altering the task's objective credentials. (3) LSM interface. A number of functions have been changed, added or removed: (*) security_capset_check(), ->capset_check() (*) security_capset_set(), ->capset_set() Removed in favour of security_capset(). (*) security_capset(), ->capset() New. This is passed a pointer to the new creds, a pointer to the old creds and the proposed capability sets. It should fill in the new creds or return an error. All pointers, barring the pointer to the new creds, are now const. (*) security_bprm_apply_creds(), ->bprm_apply_creds() Changed; now returns a value, which will cause the process to be killed if it's an error. (*) security_task_alloc(), ->task_alloc_security() Removed in favour of security_prepare_creds(). (*) security_cred_free(), ->cred_free() New. Free security data attached to cred->security. (*) security_prepare_creds(), ->cred_prepare() New. Duplicate any security data attached to cred->security. (*) security_commit_creds(), ->cred_commit() New. Apply any security effects for the upcoming installation of new security by commit_creds(). (*) security_task_post_setuid(), ->task_post_setuid() Removed in favour of security_task_fix_setuid(). (*) security_task_fix_setuid(), ->task_fix_setuid() Fix up the proposed new credentials for setuid(). This is used by cap_set_fix_setuid() to implicitly adjust capabilities in line with setuid() changes. Changes are made to the new credentials, rather than the task itself as in security_task_post_setuid(). (*) security_task_reparent_to_init(), ->task_reparent_to_init() Removed. Instead the task being reparented to init is referred directly to init's credentials. NOTE! This results in the loss of some state: SELinux's osid no longer records the sid of the thread that forked it. (*) security_key_alloc(), ->key_alloc() (*) security_key_permission(), ->key_permission() Changed. These now take cred pointers rather than task pointers to refer to the security context. (4) sys_capset(). This has been simplified and uses less locking. The LSM functions it calls have been merged. (5) reparent_to_kthreadd(). This gives the current thread the same credentials as init by simply using commit_thread() to point that way. (6) __sigqueue_alloc() and switch_uid() __sigqueue_alloc() can't stop the target task from changing its creds beneath it, so this function gets a reference to the currently applicable user_struct which it then passes into the sigqueue struct it returns if successful. switch_uid() is now called from commit_creds(), and possibly should be folded into that. commit_creds() should take care of protecting __sigqueue_alloc(). (7) [sg]et[ug]id() and co and [sg]et_current_groups. The set functions now all use prepare_creds(), commit_creds() and abort_creds() to build and check a new set of credentials before applying it. security_task_set[ug]id() is called inside the prepared section. This guarantees that nothing else will affect the creds until we've finished. The calling of set_dumpable() has been moved into commit_creds(). Much of the functionality of set_user() has been moved into commit_creds(). The get functions all simply access the data directly. (8) security_task_prctl() and cap_task_prctl(). security_task_prctl() has been modified to return -ENOSYS if it doesn't want to handle a function, or otherwise return the return value directly rather than through an argument. Additionally, cap_task_prctl() now prepares a new set of credentials, even if it doesn't end up using it. (9) Keyrings. A number of changes have been made to the keyrings code: (a) switch_uid_keyring(), copy_keys(), exit_keys() and suid_keys() have all been dropped and built in to the credentials functions directly. They may want separating out again later. (b) key_alloc() and search_process_keyrings() now take a cred pointer rather than a task pointer to specify the security context. (c) copy_creds() gives a new thread within the same thread group a new thread keyring if its parent had one, otherwise it discards the thread keyring. (d) The authorisation key now points directly to the credentials to extend the search into rather pointing to the task that carries them. (e) Installing thread, process or session keyrings causes a new set of credentials to be created, even though it's not strictly necessary for process or session keyrings (they're shared). (10) Usermode helper. The usermode helper code now carries a cred struct pointer in its subprocess_info struct instead of a new session keyring pointer. This set of credentials is derived from init_cred and installed on the new process after it has been cloned. call_usermodehelper_setup() allocates the new credentials and call_usermodehelper_freeinfo() discards them if they haven't been used. A special cred function (prepare_usermodeinfo_creds()) is provided specifically for call_usermodehelper_setup() to call. call_usermodehelper_setkeys() adjusts the credentials to sport the supplied keyring as the new session keyring. (11) SELinux. SELinux has a number of changes, in addition to those to support the LSM interface changes mentioned above: (a) selinux_setprocattr() no longer does its check for whether the current ptracer can access processes with the new SID inside the lock that covers getting the ptracer's SID. Whilst this lock ensures that the check is done with the ptracer pinned, the result is only valid until the lock is released, so there's no point doing it inside the lock. (12) is_single_threaded(). This function has been extracted from selinux_setprocattr() and put into a file of its own in the lib/ directory as join_session_keyring() now wants to use it too. The code in SELinux just checked to see whether a task shared mm_structs with other tasks (CLONE_VM), but that isn't good enough. We really want to know if they're part of the same thread group (CLONE_THREAD). (13) nfsd. The NFS server daemon now has to use the COW credentials to set the credentials it is going to use. It really needs to pass the credentials down to the functions it calls, but it can't do that until other patches in this series have been applied. Signed-off-by: David Howells <dhowells@redhat.com> Acked-by: James Morris <jmorris@namei.org> Signed-off-by: James Morris <jmorris@namei.org>
2008-11-14 07:39:23 +08:00
return keyring_search_aux(keyring, current->cred,
type, description, type->match, false);
}
EXPORT_SYMBOL(keyring_search);
/*
* Search the given keyring only (no recursion).
*
* The caller must guarantee that the keyring is a keyring and that the
* permission is granted to search the keyring as no check is made here.
*
* RCU is used to make it unnecessary to lock the keyring key list here.
*
* Returns a pointer to the found key with usage count incremented if
* successful and returns -ENOKEY if not found. Revoked keys and keys not
* providing the requested permission are skipped over.
*
* If successful, the possession indicator is propagated from the keyring ref
* to the returned key reference.
*/
key_ref_t __keyring_search_one(key_ref_t keyring_ref,
const struct key_type *ktype,
const char *description,
key_perm_t perm)
{
struct keyring_list *klist;
unsigned long possessed;
struct key *keyring, *key;
int nkeys, loop;
keyring = key_ref_to_ptr(keyring_ref);
possessed = is_key_possessed(keyring_ref);
[PATCH] keys: Discard key spinlock and use RCU for key payload The attached patch changes the key implementation in a number of ways: (1) It removes the spinlock from the key structure. (2) The key flags are now accessed using atomic bitops instead of write-locking the key spinlock and using C bitwise operators. The three instantiation flags are dealt with with the construction semaphore held during the request_key/instantiate/negate sequence, thus rendering the spinlock superfluous. The key flags are also now bit numbers not bit masks. (3) The key payload is now accessed using RCU. This permits the recursive keyring search algorithm to be simplified greatly since no locks need be taken other than the usual RCU preemption disablement. Searching now does not require any locks or semaphores to be held; merely that the starting keyring be pinned. (4) The keyring payload now includes an RCU head so that it can be disposed of by call_rcu(). This requires that the payload be copied on unlink to prevent introducing races in copy-down vs search-up. (5) The user key payload is now a structure with the data following it. It includes an RCU head like the keyring payload and for the same reason. It also contains a data length because the data length in the key may be changed on another CPU whilst an RCU protected read is in progress on the payload. This would then see the supposed RCU payload and the on-key data length getting out of sync. I'm tempted to drop the key's datalen entirely, except that it's used in conjunction with quota management and so is a little tricky to get rid of. (6) Update the keys documentation. Signed-Off-By: David Howells <dhowells@redhat.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-06-24 13:00:49 +08:00
rcu_read_lock();
klist = rcu_dereference(keyring->payload.subscriptions);
if (klist) {
nkeys = klist->nkeys;
smp_rmb();
for (loop = 0; loop < nkeys ; loop++) {
key = rcu_dereference(klist->keys[loop]);
if (key->type == ktype &&
[PATCH] Keys: Make request-key create an authorisation key The attached patch makes the following changes: (1) There's a new special key type called ".request_key_auth". This is an authorisation key for when one process requests a key and another process is started to construct it. This type of key cannot be created by the user; nor can it be requested by kernel services. Authorisation keys hold two references: (a) Each refers to a key being constructed. When the key being constructed is instantiated the authorisation key is revoked, rendering it of no further use. (b) The "authorising process". This is either: (i) the process that called request_key(), or: (ii) if the process that called request_key() itself had an authorisation key in its session keyring, then the authorising process referred to by that authorisation key will also be referred to by the new authorisation key. This means that the process that initiated a chain of key requests will authorise the lot of them, and will, by default, wind up with the keys obtained from them in its keyrings. (2) request_key() creates an authorisation key which is then passed to /sbin/request-key in as part of a new session keyring. (3) When request_key() is searching for a key to hand back to the caller, if it comes across an authorisation key in the session keyring of the calling process, it will also search the keyrings of the process specified therein and it will use the specified process's credentials (fsuid, fsgid, groups) to do that rather than the calling process's credentials. This allows a process started by /sbin/request-key to find keys belonging to the authorising process. (4) A key can be read, even if the process executing KEYCTL_READ doesn't have direct read or search permission if that key is contained within the keyrings of a process specified by an authorisation key found within the calling process's session keyring, and is searchable using the credentials of the authorising process. This allows a process started by /sbin/request-key to read keys belonging to the authorising process. (5) The magic KEY_SPEC_*_KEYRING key IDs when passed to KEYCTL_INSTANTIATE or KEYCTL_NEGATE will specify a keyring of the authorising process, rather than the process doing the instantiation. (6) One of the process keyrings can be nominated as the default to which request_key() should attach new keys if not otherwise specified. This is done with KEYCTL_SET_REQKEY_KEYRING and one of the KEY_REQKEY_DEFL_* constants. The current setting can also be read using this call. (7) request_key() is partially interruptible. If it is waiting for another process to finish constructing a key, it can be interrupted. This permits a request-key cycle to be broken without recourse to rebooting. Signed-Off-By: David Howells <dhowells@redhat.com> Signed-Off-By: Benoit Boissinot <benoit.boissinot@ens-lyon.org> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-06-24 13:00:56 +08:00
(!key->type->match ||
key->type->match(key, description)) &&
key_permission(make_key_ref(key, possessed),
perm) == 0 &&
!(key->flags & ((1 << KEY_FLAG_INVALIDATED) |
(1 << KEY_FLAG_REVOKED)))
)
goto found;
}
}
rcu_read_unlock();
return ERR_PTR(-ENOKEY);
found:
atomic_inc(&key->usage);
KEYS: Do LRU discard in full keyrings Do an LRU discard in keyrings that are full rather than returning ENFILE. To perform this, a time_t is added to the key struct and updated by the creation of a link to a key and by a key being found as the result of a search. At the completion of a successful search, the keyrings in the path between the root of the search and the first found link to it also have their last-used times updated. Note that discarding a link to a key from a keyring does not necessarily destroy the key as there may be references held by other places. An alternate discard method that might suffice is to perform FIFO discard from the keyring, using the spare 2-byte hole in the keylist header as the index of the next link to be discarded. This is useful when using a keyring as a cache for DNS results or foreign filesystem IDs. This can be tested by the following. As root do: echo 1000 >/proc/sys/kernel/keys/root_maxkeys kr=`keyctl newring foo @s` for ((i=0; i<2000; i++)); do keyctl add user a$i a $kr; done Without this patch ENFILE should be reported when the keyring fills up. With this patch, the keyring discards keys in an LRU fashion. Note that the stored LRU time has a granularity of 1s. After doing this, /proc/key-users can be observed and should show that most of the 2000 keys have been discarded: [root@andromeda ~]# cat /proc/key-users 0: 517 516/516 513/1000 5249/20000 The "513/1000" here is the number of quota-accounted keys present for this user out of the maximum permitted. In /proc/keys, the keyring shows the number of keys it has and the number of slots it has allocated: [root@andromeda ~]# grep foo /proc/keys 200c64c4 I--Q-- 1 perm 3b3f0000 0 0 keyring foo: 509/509 The maximum is (PAGE_SIZE - header) / key pointer size. That's typically 509 on a 64-bit system and 1020 on a 32-bit system. Signed-off-by: David Howells <dhowells@redhat.com>
2012-05-11 17:56:56 +08:00
keyring->last_used_at = key->last_used_at =
current_kernel_time().tv_sec;
[PATCH] keys: Discard key spinlock and use RCU for key payload The attached patch changes the key implementation in a number of ways: (1) It removes the spinlock from the key structure. (2) The key flags are now accessed using atomic bitops instead of write-locking the key spinlock and using C bitwise operators. The three instantiation flags are dealt with with the construction semaphore held during the request_key/instantiate/negate sequence, thus rendering the spinlock superfluous. The key flags are also now bit numbers not bit masks. (3) The key payload is now accessed using RCU. This permits the recursive keyring search algorithm to be simplified greatly since no locks need be taken other than the usual RCU preemption disablement. Searching now does not require any locks or semaphores to be held; merely that the starting keyring be pinned. (4) The keyring payload now includes an RCU head so that it can be disposed of by call_rcu(). This requires that the payload be copied on unlink to prevent introducing races in copy-down vs search-up. (5) The user key payload is now a structure with the data following it. It includes an RCU head like the keyring payload and for the same reason. It also contains a data length because the data length in the key may be changed on another CPU whilst an RCU protected read is in progress on the payload. This would then see the supposed RCU payload and the on-key data length getting out of sync. I'm tempted to drop the key's datalen entirely, except that it's used in conjunction with quota management and so is a little tricky to get rid of. (6) Update the keys documentation. Signed-Off-By: David Howells <dhowells@redhat.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-06-24 13:00:49 +08:00
rcu_read_unlock();
return make_key_ref(key, possessed);
}
/*
* Find a keyring with the specified name.
*
* All named keyrings in the current user namespace are searched, provided they
* grant Search permission directly to the caller (unless this check is
* skipped). Keyrings whose usage points have reached zero or who have been
* revoked are skipped.
*
* Returns a pointer to the keyring with the keyring's refcount having being
* incremented on success. -ENOKEY is returned if a key could not be found.
*/
struct key *find_keyring_by_name(const char *name, bool skip_perm_check)
{
struct key *keyring;
int bucket;
if (!name)
KEYS: find_keyring_by_name() can gain access to a freed keyring find_keyring_by_name() can gain access to a keyring that has had its reference count reduced to zero, and is thus ready to be freed. This then allows the dead keyring to be brought back into use whilst it is being destroyed. The following timeline illustrates the process: |(cleaner) (user) | | free_user(user) sys_keyctl() | | | | key_put(user->session_keyring) keyctl_get_keyring_ID() | || //=> keyring->usage = 0 | | |schedule_work(&key_cleanup_task) lookup_user_key() | || | | kmem_cache_free(,user) | | . |[KEY_SPEC_USER_KEYRING] | . install_user_keyrings() | . || | key_cleanup() [<= worker_thread()] || | | || | [spin_lock(&key_serial_lock)] |[mutex_lock(&key_user_keyr..mutex)] | | || | atomic_read() == 0 || | |{ rb_ease(&key->serial_node,) } || | | || | [spin_unlock(&key_serial_lock)] |find_keyring_by_name() | | ||| | keyring_destroy(keyring) ||[read_lock(&keyring_name_lock)] | || ||| | |[write_lock(&keyring_name_lock)] ||atomic_inc(&keyring->usage) | |. ||| *** GET freeing keyring *** | |. ||[read_unlock(&keyring_name_lock)] | || || | |list_del() |[mutex_unlock(&key_user_k..mutex)] | || | | |[write_unlock(&keyring_name_lock)] ** INVALID keyring is returned ** | | . | kmem_cache_free(,keyring) . | . | atomic_dec(&keyring->usage) v *** DESTROYED *** TIME If CONFIG_SLUB_DEBUG=y then we may see the following message generated: ============================================================================= BUG key_jar: Poison overwritten ----------------------------------------------------------------------------- INFO: 0xffff880197a7e200-0xffff880197a7e200. First byte 0x6a instead of 0x6b INFO: Allocated in key_alloc+0x10b/0x35f age=25 cpu=1 pid=5086 INFO: Freed in key_cleanup+0xd0/0xd5 age=12 cpu=1 pid=10 INFO: Slab 0xffffea000592cb90 objects=16 used=2 fp=0xffff880197a7e200 flags=0x200000000000c3 INFO: Object 0xffff880197a7e200 @offset=512 fp=0xffff880197a7e300 Bytes b4 0xffff880197a7e1f0: 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a ZZZZZZZZZZZZZZZZ Object 0xffff880197a7e200: 6a 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b jkkkkkkkkkkkkkkk Alternatively, we may see a system panic happen, such as: BUG: unable to handle kernel NULL pointer dereference at 0000000000000001 IP: [<ffffffff810e61a3>] kmem_cache_alloc+0x5b/0xe9 PGD 6b2b4067 PUD 6a80d067 PMD 0 Oops: 0000 [#1] SMP last sysfs file: /sys/kernel/kexec_crash_loaded CPU 1 ... Pid: 31245, comm: su Not tainted 2.6.34-rc5-nofixed-nodebug #2 D2089/PRIMERGY RIP: 0010:[<ffffffff810e61a3>] [<ffffffff810e61a3>] kmem_cache_alloc+0x5b/0xe9 RSP: 0018:ffff88006af3bd98 EFLAGS: 00010002 RAX: 0000000000000000 RBX: 0000000000000001 RCX: ffff88007d19900b RDX: 0000000100000000 RSI: 00000000000080d0 RDI: ffffffff81828430 RBP: ffffffff81828430 R08: ffff88000a293750 R09: 0000000000000000 R10: 0000000000000001 R11: 0000000000100000 R12: 00000000000080d0 R13: 00000000000080d0 R14: 0000000000000296 R15: ffffffff810f20ce FS: 00007f97116bc700(0000) GS:ffff88000a280000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000001 CR3: 000000006a91c000 CR4: 00000000000006e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 Process su (pid: 31245, threadinfo ffff88006af3a000, task ffff8800374414c0) Stack: 0000000512e0958e 0000000000008000 ffff880037f8d180 0000000000000001 0000000000000000 0000000000008001 ffff88007d199000 ffffffff810f20ce 0000000000008000 ffff88006af3be48 0000000000000024 ffffffff810face3 Call Trace: [<ffffffff810f20ce>] ? get_empty_filp+0x70/0x12f [<ffffffff810face3>] ? do_filp_open+0x145/0x590 [<ffffffff810ce208>] ? tlb_finish_mmu+0x2a/0x33 [<ffffffff810ce43c>] ? unmap_region+0xd3/0xe2 [<ffffffff810e4393>] ? virt_to_head_page+0x9/0x2d [<ffffffff81103916>] ? alloc_fd+0x69/0x10e [<ffffffff810ef4ed>] ? do_sys_open+0x56/0xfc [<ffffffff81008a02>] ? system_call_fastpath+0x16/0x1b Code: 0f 1f 44 00 00 49 89 c6 fa 66 0f 1f 44 00 00 65 4c 8b 04 25 60 e8 00 00 48 8b 45 00 49 01 c0 49 8b 18 48 85 db 74 0d 48 63 45 18 <48> 8b 04 03 49 89 00 eb 14 4c 89 f9 83 ca ff 44 89 e6 48 89 ef RIP [<ffffffff810e61a3>] kmem_cache_alloc+0x5b/0xe9 This problem is that find_keyring_by_name does not confirm that the keyring is valid before accepting it. Skipping keyrings that have been reduced to a zero count seems the way to go. To this end, use atomic_inc_not_zero() to increment the usage count and skip the candidate keyring if that returns false. The following script _may_ cause the bug to happen, but there's no guarantee as the window of opportunity is small: #!/bin/sh LOOP=100000 USER=dummy_user /bin/su -c "exit;" $USER || { /usr/sbin/adduser -m $USER; add=1; } for ((i=0; i<LOOP; i++)) do /bin/su -c "echo '$i' > /dev/null" $USER done (( add == 1 )) && /usr/sbin/userdel -r $USER exit Note that the nominated user must not be in use. An alternative way of testing this may be: for ((i=0; i<100000; i++)) do keyctl session foo /bin/true || break done >&/dev/null as that uses a keyring named "foo" rather than relying on the user and user-session named keyrings. Reported-by: Toshiyuki Okajima <toshi.okajima@jp.fujitsu.com> Signed-off-by: David Howells <dhowells@redhat.com> Tested-by: Toshiyuki Okajima <toshi.okajima@jp.fujitsu.com> Acked-by: Serge Hallyn <serue@us.ibm.com> Signed-off-by: James Morris <jmorris@namei.org>
2010-04-30 21:32:13 +08:00
return ERR_PTR(-EINVAL);
bucket = keyring_hash(name);
read_lock(&keyring_name_lock);
if (keyring_name_hash[bucket].next) {
/* search this hash bucket for a keyring with a matching name
* that's readable and that hasn't been revoked */
list_for_each_entry(keyring,
&keyring_name_hash[bucket],
type_data.link
) {
if (keyring->user->user_ns != current_user_ns())
continue;
[PATCH] keys: Discard key spinlock and use RCU for key payload The attached patch changes the key implementation in a number of ways: (1) It removes the spinlock from the key structure. (2) The key flags are now accessed using atomic bitops instead of write-locking the key spinlock and using C bitwise operators. The three instantiation flags are dealt with with the construction semaphore held during the request_key/instantiate/negate sequence, thus rendering the spinlock superfluous. The key flags are also now bit numbers not bit masks. (3) The key payload is now accessed using RCU. This permits the recursive keyring search algorithm to be simplified greatly since no locks need be taken other than the usual RCU preemption disablement. Searching now does not require any locks or semaphores to be held; merely that the starting keyring be pinned. (4) The keyring payload now includes an RCU head so that it can be disposed of by call_rcu(). This requires that the payload be copied on unlink to prevent introducing races in copy-down vs search-up. (5) The user key payload is now a structure with the data following it. It includes an RCU head like the keyring payload and for the same reason. It also contains a data length because the data length in the key may be changed on another CPU whilst an RCU protected read is in progress on the payload. This would then see the supposed RCU payload and the on-key data length getting out of sync. I'm tempted to drop the key's datalen entirely, except that it's used in conjunction with quota management and so is a little tricky to get rid of. (6) Update the keys documentation. Signed-Off-By: David Howells <dhowells@redhat.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-06-24 13:00:49 +08:00
if (test_bit(KEY_FLAG_REVOKED, &keyring->flags))
continue;
if (strcmp(keyring->description, name) != 0)
continue;
if (!skip_perm_check &&
key_permission(make_key_ref(keyring, 0),
KEY_SEARCH) < 0)
continue;
KEYS: find_keyring_by_name() can gain access to a freed keyring find_keyring_by_name() can gain access to a keyring that has had its reference count reduced to zero, and is thus ready to be freed. This then allows the dead keyring to be brought back into use whilst it is being destroyed. The following timeline illustrates the process: |(cleaner) (user) | | free_user(user) sys_keyctl() | | | | key_put(user->session_keyring) keyctl_get_keyring_ID() | || //=> keyring->usage = 0 | | |schedule_work(&key_cleanup_task) lookup_user_key() | || | | kmem_cache_free(,user) | | . |[KEY_SPEC_USER_KEYRING] | . install_user_keyrings() | . || | key_cleanup() [<= worker_thread()] || | | || | [spin_lock(&key_serial_lock)] |[mutex_lock(&key_user_keyr..mutex)] | | || | atomic_read() == 0 || | |{ rb_ease(&key->serial_node,) } || | | || | [spin_unlock(&key_serial_lock)] |find_keyring_by_name() | | ||| | keyring_destroy(keyring) ||[read_lock(&keyring_name_lock)] | || ||| | |[write_lock(&keyring_name_lock)] ||atomic_inc(&keyring->usage) | |. ||| *** GET freeing keyring *** | |. ||[read_unlock(&keyring_name_lock)] | || || | |list_del() |[mutex_unlock(&key_user_k..mutex)] | || | | |[write_unlock(&keyring_name_lock)] ** INVALID keyring is returned ** | | . | kmem_cache_free(,keyring) . | . | atomic_dec(&keyring->usage) v *** DESTROYED *** TIME If CONFIG_SLUB_DEBUG=y then we may see the following message generated: ============================================================================= BUG key_jar: Poison overwritten ----------------------------------------------------------------------------- INFO: 0xffff880197a7e200-0xffff880197a7e200. First byte 0x6a instead of 0x6b INFO: Allocated in key_alloc+0x10b/0x35f age=25 cpu=1 pid=5086 INFO: Freed in key_cleanup+0xd0/0xd5 age=12 cpu=1 pid=10 INFO: Slab 0xffffea000592cb90 objects=16 used=2 fp=0xffff880197a7e200 flags=0x200000000000c3 INFO: Object 0xffff880197a7e200 @offset=512 fp=0xffff880197a7e300 Bytes b4 0xffff880197a7e1f0: 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a ZZZZZZZZZZZZZZZZ Object 0xffff880197a7e200: 6a 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b jkkkkkkkkkkkkkkk Alternatively, we may see a system panic happen, such as: BUG: unable to handle kernel NULL pointer dereference at 0000000000000001 IP: [<ffffffff810e61a3>] kmem_cache_alloc+0x5b/0xe9 PGD 6b2b4067 PUD 6a80d067 PMD 0 Oops: 0000 [#1] SMP last sysfs file: /sys/kernel/kexec_crash_loaded CPU 1 ... Pid: 31245, comm: su Not tainted 2.6.34-rc5-nofixed-nodebug #2 D2089/PRIMERGY RIP: 0010:[<ffffffff810e61a3>] [<ffffffff810e61a3>] kmem_cache_alloc+0x5b/0xe9 RSP: 0018:ffff88006af3bd98 EFLAGS: 00010002 RAX: 0000000000000000 RBX: 0000000000000001 RCX: ffff88007d19900b RDX: 0000000100000000 RSI: 00000000000080d0 RDI: ffffffff81828430 RBP: ffffffff81828430 R08: ffff88000a293750 R09: 0000000000000000 R10: 0000000000000001 R11: 0000000000100000 R12: 00000000000080d0 R13: 00000000000080d0 R14: 0000000000000296 R15: ffffffff810f20ce FS: 00007f97116bc700(0000) GS:ffff88000a280000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000001 CR3: 000000006a91c000 CR4: 00000000000006e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 Process su (pid: 31245, threadinfo ffff88006af3a000, task ffff8800374414c0) Stack: 0000000512e0958e 0000000000008000 ffff880037f8d180 0000000000000001 0000000000000000 0000000000008001 ffff88007d199000 ffffffff810f20ce 0000000000008000 ffff88006af3be48 0000000000000024 ffffffff810face3 Call Trace: [<ffffffff810f20ce>] ? get_empty_filp+0x70/0x12f [<ffffffff810face3>] ? do_filp_open+0x145/0x590 [<ffffffff810ce208>] ? tlb_finish_mmu+0x2a/0x33 [<ffffffff810ce43c>] ? unmap_region+0xd3/0xe2 [<ffffffff810e4393>] ? virt_to_head_page+0x9/0x2d [<ffffffff81103916>] ? alloc_fd+0x69/0x10e [<ffffffff810ef4ed>] ? do_sys_open+0x56/0xfc [<ffffffff81008a02>] ? system_call_fastpath+0x16/0x1b Code: 0f 1f 44 00 00 49 89 c6 fa 66 0f 1f 44 00 00 65 4c 8b 04 25 60 e8 00 00 48 8b 45 00 49 01 c0 49 8b 18 48 85 db 74 0d 48 63 45 18 <48> 8b 04 03 49 89 00 eb 14 4c 89 f9 83 ca ff 44 89 e6 48 89 ef RIP [<ffffffff810e61a3>] kmem_cache_alloc+0x5b/0xe9 This problem is that find_keyring_by_name does not confirm that the keyring is valid before accepting it. Skipping keyrings that have been reduced to a zero count seems the way to go. To this end, use atomic_inc_not_zero() to increment the usage count and skip the candidate keyring if that returns false. The following script _may_ cause the bug to happen, but there's no guarantee as the window of opportunity is small: #!/bin/sh LOOP=100000 USER=dummy_user /bin/su -c "exit;" $USER || { /usr/sbin/adduser -m $USER; add=1; } for ((i=0; i<LOOP; i++)) do /bin/su -c "echo '$i' > /dev/null" $USER done (( add == 1 )) && /usr/sbin/userdel -r $USER exit Note that the nominated user must not be in use. An alternative way of testing this may be: for ((i=0; i<100000; i++)) do keyctl session foo /bin/true || break done >&/dev/null as that uses a keyring named "foo" rather than relying on the user and user-session named keyrings. Reported-by: Toshiyuki Okajima <toshi.okajima@jp.fujitsu.com> Signed-off-by: David Howells <dhowells@redhat.com> Tested-by: Toshiyuki Okajima <toshi.okajima@jp.fujitsu.com> Acked-by: Serge Hallyn <serue@us.ibm.com> Signed-off-by: James Morris <jmorris@namei.org>
2010-04-30 21:32:13 +08:00
/* we've got a match but we might end up racing with
* key_cleanup() if the keyring is currently 'dead'
* (ie. it has a zero usage count) */
if (!atomic_inc_not_zero(&keyring->usage))
continue;
KEYS: Do LRU discard in full keyrings Do an LRU discard in keyrings that are full rather than returning ENFILE. To perform this, a time_t is added to the key struct and updated by the creation of a link to a key and by a key being found as the result of a search. At the completion of a successful search, the keyrings in the path between the root of the search and the first found link to it also have their last-used times updated. Note that discarding a link to a key from a keyring does not necessarily destroy the key as there may be references held by other places. An alternate discard method that might suffice is to perform FIFO discard from the keyring, using the spare 2-byte hole in the keylist header as the index of the next link to be discarded. This is useful when using a keyring as a cache for DNS results or foreign filesystem IDs. This can be tested by the following. As root do: echo 1000 >/proc/sys/kernel/keys/root_maxkeys kr=`keyctl newring foo @s` for ((i=0; i<2000; i++)); do keyctl add user a$i a $kr; done Without this patch ENFILE should be reported when the keyring fills up. With this patch, the keyring discards keys in an LRU fashion. Note that the stored LRU time has a granularity of 1s. After doing this, /proc/key-users can be observed and should show that most of the 2000 keys have been discarded: [root@andromeda ~]# cat /proc/key-users 0: 517 516/516 513/1000 5249/20000 The "513/1000" here is the number of quota-accounted keys present for this user out of the maximum permitted. In /proc/keys, the keyring shows the number of keys it has and the number of slots it has allocated: [root@andromeda ~]# grep foo /proc/keys 200c64c4 I--Q-- 1 perm 3b3f0000 0 0 keyring foo: 509/509 The maximum is (PAGE_SIZE - header) / key pointer size. That's typically 509 on a 64-bit system and 1020 on a 32-bit system. Signed-off-by: David Howells <dhowells@redhat.com>
2012-05-11 17:56:56 +08:00
keyring->last_used_at = current_kernel_time().tv_sec;
KEYS: find_keyring_by_name() can gain access to a freed keyring find_keyring_by_name() can gain access to a keyring that has had its reference count reduced to zero, and is thus ready to be freed. This then allows the dead keyring to be brought back into use whilst it is being destroyed. The following timeline illustrates the process: |(cleaner) (user) | | free_user(user) sys_keyctl() | | | | key_put(user->session_keyring) keyctl_get_keyring_ID() | || //=> keyring->usage = 0 | | |schedule_work(&key_cleanup_task) lookup_user_key() | || | | kmem_cache_free(,user) | | . |[KEY_SPEC_USER_KEYRING] | . install_user_keyrings() | . || | key_cleanup() [<= worker_thread()] || | | || | [spin_lock(&key_serial_lock)] |[mutex_lock(&key_user_keyr..mutex)] | | || | atomic_read() == 0 || | |{ rb_ease(&key->serial_node,) } || | | || | [spin_unlock(&key_serial_lock)] |find_keyring_by_name() | | ||| | keyring_destroy(keyring) ||[read_lock(&keyring_name_lock)] | || ||| | |[write_lock(&keyring_name_lock)] ||atomic_inc(&keyring->usage) | |. ||| *** GET freeing keyring *** | |. ||[read_unlock(&keyring_name_lock)] | || || | |list_del() |[mutex_unlock(&key_user_k..mutex)] | || | | |[write_unlock(&keyring_name_lock)] ** INVALID keyring is returned ** | | . | kmem_cache_free(,keyring) . | . | atomic_dec(&keyring->usage) v *** DESTROYED *** TIME If CONFIG_SLUB_DEBUG=y then we may see the following message generated: ============================================================================= BUG key_jar: Poison overwritten ----------------------------------------------------------------------------- INFO: 0xffff880197a7e200-0xffff880197a7e200. First byte 0x6a instead of 0x6b INFO: Allocated in key_alloc+0x10b/0x35f age=25 cpu=1 pid=5086 INFO: Freed in key_cleanup+0xd0/0xd5 age=12 cpu=1 pid=10 INFO: Slab 0xffffea000592cb90 objects=16 used=2 fp=0xffff880197a7e200 flags=0x200000000000c3 INFO: Object 0xffff880197a7e200 @offset=512 fp=0xffff880197a7e300 Bytes b4 0xffff880197a7e1f0: 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a ZZZZZZZZZZZZZZZZ Object 0xffff880197a7e200: 6a 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b jkkkkkkkkkkkkkkk Alternatively, we may see a system panic happen, such as: BUG: unable to handle kernel NULL pointer dereference at 0000000000000001 IP: [<ffffffff810e61a3>] kmem_cache_alloc+0x5b/0xe9 PGD 6b2b4067 PUD 6a80d067 PMD 0 Oops: 0000 [#1] SMP last sysfs file: /sys/kernel/kexec_crash_loaded CPU 1 ... Pid: 31245, comm: su Not tainted 2.6.34-rc5-nofixed-nodebug #2 D2089/PRIMERGY RIP: 0010:[<ffffffff810e61a3>] [<ffffffff810e61a3>] kmem_cache_alloc+0x5b/0xe9 RSP: 0018:ffff88006af3bd98 EFLAGS: 00010002 RAX: 0000000000000000 RBX: 0000000000000001 RCX: ffff88007d19900b RDX: 0000000100000000 RSI: 00000000000080d0 RDI: ffffffff81828430 RBP: ffffffff81828430 R08: ffff88000a293750 R09: 0000000000000000 R10: 0000000000000001 R11: 0000000000100000 R12: 00000000000080d0 R13: 00000000000080d0 R14: 0000000000000296 R15: ffffffff810f20ce FS: 00007f97116bc700(0000) GS:ffff88000a280000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000001 CR3: 000000006a91c000 CR4: 00000000000006e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 Process su (pid: 31245, threadinfo ffff88006af3a000, task ffff8800374414c0) Stack: 0000000512e0958e 0000000000008000 ffff880037f8d180 0000000000000001 0000000000000000 0000000000008001 ffff88007d199000 ffffffff810f20ce 0000000000008000 ffff88006af3be48 0000000000000024 ffffffff810face3 Call Trace: [<ffffffff810f20ce>] ? get_empty_filp+0x70/0x12f [<ffffffff810face3>] ? do_filp_open+0x145/0x590 [<ffffffff810ce208>] ? tlb_finish_mmu+0x2a/0x33 [<ffffffff810ce43c>] ? unmap_region+0xd3/0xe2 [<ffffffff810e4393>] ? virt_to_head_page+0x9/0x2d [<ffffffff81103916>] ? alloc_fd+0x69/0x10e [<ffffffff810ef4ed>] ? do_sys_open+0x56/0xfc [<ffffffff81008a02>] ? system_call_fastpath+0x16/0x1b Code: 0f 1f 44 00 00 49 89 c6 fa 66 0f 1f 44 00 00 65 4c 8b 04 25 60 e8 00 00 48 8b 45 00 49 01 c0 49 8b 18 48 85 db 74 0d 48 63 45 18 <48> 8b 04 03 49 89 00 eb 14 4c 89 f9 83 ca ff 44 89 e6 48 89 ef RIP [<ffffffff810e61a3>] kmem_cache_alloc+0x5b/0xe9 This problem is that find_keyring_by_name does not confirm that the keyring is valid before accepting it. Skipping keyrings that have been reduced to a zero count seems the way to go. To this end, use atomic_inc_not_zero() to increment the usage count and skip the candidate keyring if that returns false. The following script _may_ cause the bug to happen, but there's no guarantee as the window of opportunity is small: #!/bin/sh LOOP=100000 USER=dummy_user /bin/su -c "exit;" $USER || { /usr/sbin/adduser -m $USER; add=1; } for ((i=0; i<LOOP; i++)) do /bin/su -c "echo '$i' > /dev/null" $USER done (( add == 1 )) && /usr/sbin/userdel -r $USER exit Note that the nominated user must not be in use. An alternative way of testing this may be: for ((i=0; i<100000; i++)) do keyctl session foo /bin/true || break done >&/dev/null as that uses a keyring named "foo" rather than relying on the user and user-session named keyrings. Reported-by: Toshiyuki Okajima <toshi.okajima@jp.fujitsu.com> Signed-off-by: David Howells <dhowells@redhat.com> Tested-by: Toshiyuki Okajima <toshi.okajima@jp.fujitsu.com> Acked-by: Serge Hallyn <serue@us.ibm.com> Signed-off-by: James Morris <jmorris@namei.org>
2010-04-30 21:32:13 +08:00
goto out;
}
}
keyring = ERR_PTR(-ENOKEY);
KEYS: find_keyring_by_name() can gain access to a freed keyring find_keyring_by_name() can gain access to a keyring that has had its reference count reduced to zero, and is thus ready to be freed. This then allows the dead keyring to be brought back into use whilst it is being destroyed. The following timeline illustrates the process: |(cleaner) (user) | | free_user(user) sys_keyctl() | | | | key_put(user->session_keyring) keyctl_get_keyring_ID() | || //=> keyring->usage = 0 | | |schedule_work(&key_cleanup_task) lookup_user_key() | || | | kmem_cache_free(,user) | | . |[KEY_SPEC_USER_KEYRING] | . install_user_keyrings() | . || | key_cleanup() [<= worker_thread()] || | | || | [spin_lock(&key_serial_lock)] |[mutex_lock(&key_user_keyr..mutex)] | | || | atomic_read() == 0 || | |{ rb_ease(&key->serial_node,) } || | | || | [spin_unlock(&key_serial_lock)] |find_keyring_by_name() | | ||| | keyring_destroy(keyring) ||[read_lock(&keyring_name_lock)] | || ||| | |[write_lock(&keyring_name_lock)] ||atomic_inc(&keyring->usage) | |. ||| *** GET freeing keyring *** | |. ||[read_unlock(&keyring_name_lock)] | || || | |list_del() |[mutex_unlock(&key_user_k..mutex)] | || | | |[write_unlock(&keyring_name_lock)] ** INVALID keyring is returned ** | | . | kmem_cache_free(,keyring) . | . | atomic_dec(&keyring->usage) v *** DESTROYED *** TIME If CONFIG_SLUB_DEBUG=y then we may see the following message generated: ============================================================================= BUG key_jar: Poison overwritten ----------------------------------------------------------------------------- INFO: 0xffff880197a7e200-0xffff880197a7e200. First byte 0x6a instead of 0x6b INFO: Allocated in key_alloc+0x10b/0x35f age=25 cpu=1 pid=5086 INFO: Freed in key_cleanup+0xd0/0xd5 age=12 cpu=1 pid=10 INFO: Slab 0xffffea000592cb90 objects=16 used=2 fp=0xffff880197a7e200 flags=0x200000000000c3 INFO: Object 0xffff880197a7e200 @offset=512 fp=0xffff880197a7e300 Bytes b4 0xffff880197a7e1f0: 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a ZZZZZZZZZZZZZZZZ Object 0xffff880197a7e200: 6a 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b jkkkkkkkkkkkkkkk Alternatively, we may see a system panic happen, such as: BUG: unable to handle kernel NULL pointer dereference at 0000000000000001 IP: [<ffffffff810e61a3>] kmem_cache_alloc+0x5b/0xe9 PGD 6b2b4067 PUD 6a80d067 PMD 0 Oops: 0000 [#1] SMP last sysfs file: /sys/kernel/kexec_crash_loaded CPU 1 ... Pid: 31245, comm: su Not tainted 2.6.34-rc5-nofixed-nodebug #2 D2089/PRIMERGY RIP: 0010:[<ffffffff810e61a3>] [<ffffffff810e61a3>] kmem_cache_alloc+0x5b/0xe9 RSP: 0018:ffff88006af3bd98 EFLAGS: 00010002 RAX: 0000000000000000 RBX: 0000000000000001 RCX: ffff88007d19900b RDX: 0000000100000000 RSI: 00000000000080d0 RDI: ffffffff81828430 RBP: ffffffff81828430 R08: ffff88000a293750 R09: 0000000000000000 R10: 0000000000000001 R11: 0000000000100000 R12: 00000000000080d0 R13: 00000000000080d0 R14: 0000000000000296 R15: ffffffff810f20ce FS: 00007f97116bc700(0000) GS:ffff88000a280000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000001 CR3: 000000006a91c000 CR4: 00000000000006e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 Process su (pid: 31245, threadinfo ffff88006af3a000, task ffff8800374414c0) Stack: 0000000512e0958e 0000000000008000 ffff880037f8d180 0000000000000001 0000000000000000 0000000000008001 ffff88007d199000 ffffffff810f20ce 0000000000008000 ffff88006af3be48 0000000000000024 ffffffff810face3 Call Trace: [<ffffffff810f20ce>] ? get_empty_filp+0x70/0x12f [<ffffffff810face3>] ? do_filp_open+0x145/0x590 [<ffffffff810ce208>] ? tlb_finish_mmu+0x2a/0x33 [<ffffffff810ce43c>] ? unmap_region+0xd3/0xe2 [<ffffffff810e4393>] ? virt_to_head_page+0x9/0x2d [<ffffffff81103916>] ? alloc_fd+0x69/0x10e [<ffffffff810ef4ed>] ? do_sys_open+0x56/0xfc [<ffffffff81008a02>] ? system_call_fastpath+0x16/0x1b Code: 0f 1f 44 00 00 49 89 c6 fa 66 0f 1f 44 00 00 65 4c 8b 04 25 60 e8 00 00 48 8b 45 00 49 01 c0 49 8b 18 48 85 db 74 0d 48 63 45 18 <48> 8b 04 03 49 89 00 eb 14 4c 89 f9 83 ca ff 44 89 e6 48 89 ef RIP [<ffffffff810e61a3>] kmem_cache_alloc+0x5b/0xe9 This problem is that find_keyring_by_name does not confirm that the keyring is valid before accepting it. Skipping keyrings that have been reduced to a zero count seems the way to go. To this end, use atomic_inc_not_zero() to increment the usage count and skip the candidate keyring if that returns false. The following script _may_ cause the bug to happen, but there's no guarantee as the window of opportunity is small: #!/bin/sh LOOP=100000 USER=dummy_user /bin/su -c "exit;" $USER || { /usr/sbin/adduser -m $USER; add=1; } for ((i=0; i<LOOP; i++)) do /bin/su -c "echo '$i' > /dev/null" $USER done (( add == 1 )) && /usr/sbin/userdel -r $USER exit Note that the nominated user must not be in use. An alternative way of testing this may be: for ((i=0; i<100000; i++)) do keyctl session foo /bin/true || break done >&/dev/null as that uses a keyring named "foo" rather than relying on the user and user-session named keyrings. Reported-by: Toshiyuki Okajima <toshi.okajima@jp.fujitsu.com> Signed-off-by: David Howells <dhowells@redhat.com> Tested-by: Toshiyuki Okajima <toshi.okajima@jp.fujitsu.com> Acked-by: Serge Hallyn <serue@us.ibm.com> Signed-off-by: James Morris <jmorris@namei.org>
2010-04-30 21:32:13 +08:00
out:
read_unlock(&keyring_name_lock);
return keyring;
}
/*
* See if a cycle will will be created by inserting acyclic tree B in acyclic
* tree A at the topmost level (ie: as a direct child of A).
*
* Since we are adding B to A at the top level, checking for cycles should just
* be a matter of seeing if node A is somewhere in tree B.
*/
static int keyring_detect_cycle(struct key *A, struct key *B)
{
struct {
[PATCH] keys: Discard key spinlock and use RCU for key payload The attached patch changes the key implementation in a number of ways: (1) It removes the spinlock from the key structure. (2) The key flags are now accessed using atomic bitops instead of write-locking the key spinlock and using C bitwise operators. The three instantiation flags are dealt with with the construction semaphore held during the request_key/instantiate/negate sequence, thus rendering the spinlock superfluous. The key flags are also now bit numbers not bit masks. (3) The key payload is now accessed using RCU. This permits the recursive keyring search algorithm to be simplified greatly since no locks need be taken other than the usual RCU preemption disablement. Searching now does not require any locks or semaphores to be held; merely that the starting keyring be pinned. (4) The keyring payload now includes an RCU head so that it can be disposed of by call_rcu(). This requires that the payload be copied on unlink to prevent introducing races in copy-down vs search-up. (5) The user key payload is now a structure with the data following it. It includes an RCU head like the keyring payload and for the same reason. It also contains a data length because the data length in the key may be changed on another CPU whilst an RCU protected read is in progress on the payload. This would then see the supposed RCU payload and the on-key data length getting out of sync. I'm tempted to drop the key's datalen entirely, except that it's used in conjunction with quota management and so is a little tricky to get rid of. (6) Update the keys documentation. Signed-Off-By: David Howells <dhowells@redhat.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-06-24 13:00:49 +08:00
struct keyring_list *keylist;
int kix;
} stack[KEYRING_SEARCH_MAX_DEPTH];
struct keyring_list *keylist;
struct key *subtree, *key;
int sp, nkeys, kix, ret;
[PATCH] keys: Discard key spinlock and use RCU for key payload The attached patch changes the key implementation in a number of ways: (1) It removes the spinlock from the key structure. (2) The key flags are now accessed using atomic bitops instead of write-locking the key spinlock and using C bitwise operators. The three instantiation flags are dealt with with the construction semaphore held during the request_key/instantiate/negate sequence, thus rendering the spinlock superfluous. The key flags are also now bit numbers not bit masks. (3) The key payload is now accessed using RCU. This permits the recursive keyring search algorithm to be simplified greatly since no locks need be taken other than the usual RCU preemption disablement. Searching now does not require any locks or semaphores to be held; merely that the starting keyring be pinned. (4) The keyring payload now includes an RCU head so that it can be disposed of by call_rcu(). This requires that the payload be copied on unlink to prevent introducing races in copy-down vs search-up. (5) The user key payload is now a structure with the data following it. It includes an RCU head like the keyring payload and for the same reason. It also contains a data length because the data length in the key may be changed on another CPU whilst an RCU protected read is in progress on the payload. This would then see the supposed RCU payload and the on-key data length getting out of sync. I'm tempted to drop the key's datalen entirely, except that it's used in conjunction with quota management and so is a little tricky to get rid of. (6) Update the keys documentation. Signed-Off-By: David Howells <dhowells@redhat.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-06-24 13:00:49 +08:00
rcu_read_lock();
ret = -EDEADLK;
if (A == B)
[PATCH] keys: Discard key spinlock and use RCU for key payload The attached patch changes the key implementation in a number of ways: (1) It removes the spinlock from the key structure. (2) The key flags are now accessed using atomic bitops instead of write-locking the key spinlock and using C bitwise operators. The three instantiation flags are dealt with with the construction semaphore held during the request_key/instantiate/negate sequence, thus rendering the spinlock superfluous. The key flags are also now bit numbers not bit masks. (3) The key payload is now accessed using RCU. This permits the recursive keyring search algorithm to be simplified greatly since no locks need be taken other than the usual RCU preemption disablement. Searching now does not require any locks or semaphores to be held; merely that the starting keyring be pinned. (4) The keyring payload now includes an RCU head so that it can be disposed of by call_rcu(). This requires that the payload be copied on unlink to prevent introducing races in copy-down vs search-up. (5) The user key payload is now a structure with the data following it. It includes an RCU head like the keyring payload and for the same reason. It also contains a data length because the data length in the key may be changed on another CPU whilst an RCU protected read is in progress on the payload. This would then see the supposed RCU payload and the on-key data length getting out of sync. I'm tempted to drop the key's datalen entirely, except that it's used in conjunction with quota management and so is a little tricky to get rid of. (6) Update the keys documentation. Signed-Off-By: David Howells <dhowells@redhat.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-06-24 13:00:49 +08:00
goto cycle_detected;
subtree = B;
sp = 0;
/* start processing a new keyring */
descend:
[PATCH] keys: Discard key spinlock and use RCU for key payload The attached patch changes the key implementation in a number of ways: (1) It removes the spinlock from the key structure. (2) The key flags are now accessed using atomic bitops instead of write-locking the key spinlock and using C bitwise operators. The three instantiation flags are dealt with with the construction semaphore held during the request_key/instantiate/negate sequence, thus rendering the spinlock superfluous. The key flags are also now bit numbers not bit masks. (3) The key payload is now accessed using RCU. This permits the recursive keyring search algorithm to be simplified greatly since no locks need be taken other than the usual RCU preemption disablement. Searching now does not require any locks or semaphores to be held; merely that the starting keyring be pinned. (4) The keyring payload now includes an RCU head so that it can be disposed of by call_rcu(). This requires that the payload be copied on unlink to prevent introducing races in copy-down vs search-up. (5) The user key payload is now a structure with the data following it. It includes an RCU head like the keyring payload and for the same reason. It also contains a data length because the data length in the key may be changed on another CPU whilst an RCU protected read is in progress on the payload. This would then see the supposed RCU payload and the on-key data length getting out of sync. I'm tempted to drop the key's datalen entirely, except that it's used in conjunction with quota management and so is a little tricky to get rid of. (6) Update the keys documentation. Signed-Off-By: David Howells <dhowells@redhat.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-06-24 13:00:49 +08:00
if (test_bit(KEY_FLAG_REVOKED, &subtree->flags))
goto not_this_keyring;
[PATCH] keys: Discard key spinlock and use RCU for key payload The attached patch changes the key implementation in a number of ways: (1) It removes the spinlock from the key structure. (2) The key flags are now accessed using atomic bitops instead of write-locking the key spinlock and using C bitwise operators. The three instantiation flags are dealt with with the construction semaphore held during the request_key/instantiate/negate sequence, thus rendering the spinlock superfluous. The key flags are also now bit numbers not bit masks. (3) The key payload is now accessed using RCU. This permits the recursive keyring search algorithm to be simplified greatly since no locks need be taken other than the usual RCU preemption disablement. Searching now does not require any locks or semaphores to be held; merely that the starting keyring be pinned. (4) The keyring payload now includes an RCU head so that it can be disposed of by call_rcu(). This requires that the payload be copied on unlink to prevent introducing races in copy-down vs search-up. (5) The user key payload is now a structure with the data following it. It includes an RCU head like the keyring payload and for the same reason. It also contains a data length because the data length in the key may be changed on another CPU whilst an RCU protected read is in progress on the payload. This would then see the supposed RCU payload and the on-key data length getting out of sync. I'm tempted to drop the key's datalen entirely, except that it's used in conjunction with quota management and so is a little tricky to get rid of. (6) Update the keys documentation. Signed-Off-By: David Howells <dhowells@redhat.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-06-24 13:00:49 +08:00
keylist = rcu_dereference(subtree->payload.subscriptions);
if (!keylist)
goto not_this_keyring;
kix = 0;
ascend:
/* iterate through the remaining keys in this keyring */
nkeys = keylist->nkeys;
smp_rmb();
for (; kix < nkeys; kix++) {
key = rcu_dereference(keylist->keys[kix]);
if (key == A)
goto cycle_detected;
/* recursively check nested keyrings */
if (key->type == &key_type_keyring) {
if (sp >= KEYRING_SEARCH_MAX_DEPTH)
goto too_deep;
/* stack the current position */
[PATCH] keys: Discard key spinlock and use RCU for key payload The attached patch changes the key implementation in a number of ways: (1) It removes the spinlock from the key structure. (2) The key flags are now accessed using atomic bitops instead of write-locking the key spinlock and using C bitwise operators. The three instantiation flags are dealt with with the construction semaphore held during the request_key/instantiate/negate sequence, thus rendering the spinlock superfluous. The key flags are also now bit numbers not bit masks. (3) The key payload is now accessed using RCU. This permits the recursive keyring search algorithm to be simplified greatly since no locks need be taken other than the usual RCU preemption disablement. Searching now does not require any locks or semaphores to be held; merely that the starting keyring be pinned. (4) The keyring payload now includes an RCU head so that it can be disposed of by call_rcu(). This requires that the payload be copied on unlink to prevent introducing races in copy-down vs search-up. (5) The user key payload is now a structure with the data following it. It includes an RCU head like the keyring payload and for the same reason. It also contains a data length because the data length in the key may be changed on another CPU whilst an RCU protected read is in progress on the payload. This would then see the supposed RCU payload and the on-key data length getting out of sync. I'm tempted to drop the key's datalen entirely, except that it's used in conjunction with quota management and so is a little tricky to get rid of. (6) Update the keys documentation. Signed-Off-By: David Howells <dhowells@redhat.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-06-24 13:00:49 +08:00
stack[sp].keylist = keylist;
stack[sp].kix = kix;
sp++;
/* begin again with the new keyring */
subtree = key;
goto descend;
}
}
/* the keyring we're looking at was disqualified or didn't contain a
* matching key */
not_this_keyring:
if (sp > 0) {
/* resume the checking of a keyring higher up in the tree */
sp--;
[PATCH] keys: Discard key spinlock and use RCU for key payload The attached patch changes the key implementation in a number of ways: (1) It removes the spinlock from the key structure. (2) The key flags are now accessed using atomic bitops instead of write-locking the key spinlock and using C bitwise operators. The three instantiation flags are dealt with with the construction semaphore held during the request_key/instantiate/negate sequence, thus rendering the spinlock superfluous. The key flags are also now bit numbers not bit masks. (3) The key payload is now accessed using RCU. This permits the recursive keyring search algorithm to be simplified greatly since no locks need be taken other than the usual RCU preemption disablement. Searching now does not require any locks or semaphores to be held; merely that the starting keyring be pinned. (4) The keyring payload now includes an RCU head so that it can be disposed of by call_rcu(). This requires that the payload be copied on unlink to prevent introducing races in copy-down vs search-up. (5) The user key payload is now a structure with the data following it. It includes an RCU head like the keyring payload and for the same reason. It also contains a data length because the data length in the key may be changed on another CPU whilst an RCU protected read is in progress on the payload. This would then see the supposed RCU payload and the on-key data length getting out of sync. I'm tempted to drop the key's datalen entirely, except that it's used in conjunction with quota management and so is a little tricky to get rid of. (6) Update the keys documentation. Signed-Off-By: David Howells <dhowells@redhat.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-06-24 13:00:49 +08:00
keylist = stack[sp].keylist;
kix = stack[sp].kix + 1;
goto ascend;
}
ret = 0; /* no cycles detected */
error:
[PATCH] keys: Discard key spinlock and use RCU for key payload The attached patch changes the key implementation in a number of ways: (1) It removes the spinlock from the key structure. (2) The key flags are now accessed using atomic bitops instead of write-locking the key spinlock and using C bitwise operators. The three instantiation flags are dealt with with the construction semaphore held during the request_key/instantiate/negate sequence, thus rendering the spinlock superfluous. The key flags are also now bit numbers not bit masks. (3) The key payload is now accessed using RCU. This permits the recursive keyring search algorithm to be simplified greatly since no locks need be taken other than the usual RCU preemption disablement. Searching now does not require any locks or semaphores to be held; merely that the starting keyring be pinned. (4) The keyring payload now includes an RCU head so that it can be disposed of by call_rcu(). This requires that the payload be copied on unlink to prevent introducing races in copy-down vs search-up. (5) The user key payload is now a structure with the data following it. It includes an RCU head like the keyring payload and for the same reason. It also contains a data length because the data length in the key may be changed on another CPU whilst an RCU protected read is in progress on the payload. This would then see the supposed RCU payload and the on-key data length getting out of sync. I'm tempted to drop the key's datalen entirely, except that it's used in conjunction with quota management and so is a little tricky to get rid of. (6) Update the keys documentation. Signed-Off-By: David Howells <dhowells@redhat.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-06-24 13:00:49 +08:00
rcu_read_unlock();
return ret;
too_deep:
ret = -ELOOP;
[PATCH] keys: Discard key spinlock and use RCU for key payload The attached patch changes the key implementation in a number of ways: (1) It removes the spinlock from the key structure. (2) The key flags are now accessed using atomic bitops instead of write-locking the key spinlock and using C bitwise operators. The three instantiation flags are dealt with with the construction semaphore held during the request_key/instantiate/negate sequence, thus rendering the spinlock superfluous. The key flags are also now bit numbers not bit masks. (3) The key payload is now accessed using RCU. This permits the recursive keyring search algorithm to be simplified greatly since no locks need be taken other than the usual RCU preemption disablement. Searching now does not require any locks or semaphores to be held; merely that the starting keyring be pinned. (4) The keyring payload now includes an RCU head so that it can be disposed of by call_rcu(). This requires that the payload be copied on unlink to prevent introducing races in copy-down vs search-up. (5) The user key payload is now a structure with the data following it. It includes an RCU head like the keyring payload and for the same reason. It also contains a data length because the data length in the key may be changed on another CPU whilst an RCU protected read is in progress on the payload. This would then see the supposed RCU payload and the on-key data length getting out of sync. I'm tempted to drop the key's datalen entirely, except that it's used in conjunction with quota management and so is a little tricky to get rid of. (6) Update the keys documentation. Signed-Off-By: David Howells <dhowells@redhat.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-06-24 13:00:49 +08:00
goto error;
cycle_detected:
ret = -EDEADLK;
goto error;
}
/*
* Dispose of a keyring list after the RCU grace period, freeing the unlinked
* key
*/
static void keyring_unlink_rcu_disposal(struct rcu_head *rcu)
{
struct keyring_list *klist =
container_of(rcu, struct keyring_list, rcu);
if (klist->delkey != USHRT_MAX)
key_put(rcu_access_pointer(klist->keys[klist->delkey]));
kfree(klist);
}
/*
* Preallocate memory so that a key can be linked into to a keyring.
*/
int __key_link_begin(struct key *keyring, const struct key_type *type,
const char *description, unsigned long *_prealloc)
__acquires(&keyring->sem)
{
struct keyring_list *klist, *nklist;
unsigned long prealloc;
unsigned max;
KEYS: Do LRU discard in full keyrings Do an LRU discard in keyrings that are full rather than returning ENFILE. To perform this, a time_t is added to the key struct and updated by the creation of a link to a key and by a key being found as the result of a search. At the completion of a successful search, the keyrings in the path between the root of the search and the first found link to it also have their last-used times updated. Note that discarding a link to a key from a keyring does not necessarily destroy the key as there may be references held by other places. An alternate discard method that might suffice is to perform FIFO discard from the keyring, using the spare 2-byte hole in the keylist header as the index of the next link to be discarded. This is useful when using a keyring as a cache for DNS results or foreign filesystem IDs. This can be tested by the following. As root do: echo 1000 >/proc/sys/kernel/keys/root_maxkeys kr=`keyctl newring foo @s` for ((i=0; i<2000; i++)); do keyctl add user a$i a $kr; done Without this patch ENFILE should be reported when the keyring fills up. With this patch, the keyring discards keys in an LRU fashion. Note that the stored LRU time has a granularity of 1s. After doing this, /proc/key-users can be observed and should show that most of the 2000 keys have been discarded: [root@andromeda ~]# cat /proc/key-users 0: 517 516/516 513/1000 5249/20000 The "513/1000" here is the number of quota-accounted keys present for this user out of the maximum permitted. In /proc/keys, the keyring shows the number of keys it has and the number of slots it has allocated: [root@andromeda ~]# grep foo /proc/keys 200c64c4 I--Q-- 1 perm 3b3f0000 0 0 keyring foo: 509/509 The maximum is (PAGE_SIZE - header) / key pointer size. That's typically 509 on a 64-bit system and 1020 on a 32-bit system. Signed-off-by: David Howells <dhowells@redhat.com>
2012-05-11 17:56:56 +08:00
time_t lowest_lru;
size_t size;
KEYS: Do LRU discard in full keyrings Do an LRU discard in keyrings that are full rather than returning ENFILE. To perform this, a time_t is added to the key struct and updated by the creation of a link to a key and by a key being found as the result of a search. At the completion of a successful search, the keyrings in the path between the root of the search and the first found link to it also have their last-used times updated. Note that discarding a link to a key from a keyring does not necessarily destroy the key as there may be references held by other places. An alternate discard method that might suffice is to perform FIFO discard from the keyring, using the spare 2-byte hole in the keylist header as the index of the next link to be discarded. This is useful when using a keyring as a cache for DNS results or foreign filesystem IDs. This can be tested by the following. As root do: echo 1000 >/proc/sys/kernel/keys/root_maxkeys kr=`keyctl newring foo @s` for ((i=0; i<2000; i++)); do keyctl add user a$i a $kr; done Without this patch ENFILE should be reported when the keyring fills up. With this patch, the keyring discards keys in an LRU fashion. Note that the stored LRU time has a granularity of 1s. After doing this, /proc/key-users can be observed and should show that most of the 2000 keys have been discarded: [root@andromeda ~]# cat /proc/key-users 0: 517 516/516 513/1000 5249/20000 The "513/1000" here is the number of quota-accounted keys present for this user out of the maximum permitted. In /proc/keys, the keyring shows the number of keys it has and the number of slots it has allocated: [root@andromeda ~]# grep foo /proc/keys 200c64c4 I--Q-- 1 perm 3b3f0000 0 0 keyring foo: 509/509 The maximum is (PAGE_SIZE - header) / key pointer size. That's typically 509 on a 64-bit system and 1020 on a 32-bit system. Signed-off-by: David Howells <dhowells@redhat.com>
2012-05-11 17:56:56 +08:00
int loop, lru, ret;
kenter("%d,%s,%s,", key_serial(keyring), type->name, description);
if (keyring->type != &key_type_keyring)
return -ENOTDIR;
down_write(&keyring->sem);
ret = -EKEYREVOKED;
if (test_bit(KEY_FLAG_REVOKED, &keyring->flags))
goto error_krsem;
/* serialise link/link calls to prevent parallel calls causing a cycle
* when linking two keyring in opposite orders */
if (type == &key_type_keyring)
down_write(&keyring_serialise_link_sem);
klist = rcu_dereference_locked_keyring(keyring);
/* see if there's a matching key we can displace */
KEYS: Do LRU discard in full keyrings Do an LRU discard in keyrings that are full rather than returning ENFILE. To perform this, a time_t is added to the key struct and updated by the creation of a link to a key and by a key being found as the result of a search. At the completion of a successful search, the keyrings in the path between the root of the search and the first found link to it also have their last-used times updated. Note that discarding a link to a key from a keyring does not necessarily destroy the key as there may be references held by other places. An alternate discard method that might suffice is to perform FIFO discard from the keyring, using the spare 2-byte hole in the keylist header as the index of the next link to be discarded. This is useful when using a keyring as a cache for DNS results or foreign filesystem IDs. This can be tested by the following. As root do: echo 1000 >/proc/sys/kernel/keys/root_maxkeys kr=`keyctl newring foo @s` for ((i=0; i<2000; i++)); do keyctl add user a$i a $kr; done Without this patch ENFILE should be reported when the keyring fills up. With this patch, the keyring discards keys in an LRU fashion. Note that the stored LRU time has a granularity of 1s. After doing this, /proc/key-users can be observed and should show that most of the 2000 keys have been discarded: [root@andromeda ~]# cat /proc/key-users 0: 517 516/516 513/1000 5249/20000 The "513/1000" here is the number of quota-accounted keys present for this user out of the maximum permitted. In /proc/keys, the keyring shows the number of keys it has and the number of slots it has allocated: [root@andromeda ~]# grep foo /proc/keys 200c64c4 I--Q-- 1 perm 3b3f0000 0 0 keyring foo: 509/509 The maximum is (PAGE_SIZE - header) / key pointer size. That's typically 509 on a 64-bit system and 1020 on a 32-bit system. Signed-off-by: David Howells <dhowells@redhat.com>
2012-05-11 17:56:56 +08:00
lru = -1;
if (klist && klist->nkeys > 0) {
KEYS: Do LRU discard in full keyrings Do an LRU discard in keyrings that are full rather than returning ENFILE. To perform this, a time_t is added to the key struct and updated by the creation of a link to a key and by a key being found as the result of a search. At the completion of a successful search, the keyrings in the path between the root of the search and the first found link to it also have their last-used times updated. Note that discarding a link to a key from a keyring does not necessarily destroy the key as there may be references held by other places. An alternate discard method that might suffice is to perform FIFO discard from the keyring, using the spare 2-byte hole in the keylist header as the index of the next link to be discarded. This is useful when using a keyring as a cache for DNS results or foreign filesystem IDs. This can be tested by the following. As root do: echo 1000 >/proc/sys/kernel/keys/root_maxkeys kr=`keyctl newring foo @s` for ((i=0; i<2000; i++)); do keyctl add user a$i a $kr; done Without this patch ENFILE should be reported when the keyring fills up. With this patch, the keyring discards keys in an LRU fashion. Note that the stored LRU time has a granularity of 1s. After doing this, /proc/key-users can be observed and should show that most of the 2000 keys have been discarded: [root@andromeda ~]# cat /proc/key-users 0: 517 516/516 513/1000 5249/20000 The "513/1000" here is the number of quota-accounted keys present for this user out of the maximum permitted. In /proc/keys, the keyring shows the number of keys it has and the number of slots it has allocated: [root@andromeda ~]# grep foo /proc/keys 200c64c4 I--Q-- 1 perm 3b3f0000 0 0 keyring foo: 509/509 The maximum is (PAGE_SIZE - header) / key pointer size. That's typically 509 on a 64-bit system and 1020 on a 32-bit system. Signed-off-by: David Howells <dhowells@redhat.com>
2012-05-11 17:56:56 +08:00
lowest_lru = TIME_T_MAX;
for (loop = klist->nkeys - 1; loop >= 0; loop--) {
struct key *key = rcu_deref_link_locked(klist, loop,
keyring);
if (key->type == type &&
strcmp(key->description, description) == 0) {
/* Found a match - we'll replace the link with
* one to the new key. We record the slot
* position.
*/
klist->delkey = loop;
prealloc = 0;
goto done;
}
KEYS: Do LRU discard in full keyrings Do an LRU discard in keyrings that are full rather than returning ENFILE. To perform this, a time_t is added to the key struct and updated by the creation of a link to a key and by a key being found as the result of a search. At the completion of a successful search, the keyrings in the path between the root of the search and the first found link to it also have their last-used times updated. Note that discarding a link to a key from a keyring does not necessarily destroy the key as there may be references held by other places. An alternate discard method that might suffice is to perform FIFO discard from the keyring, using the spare 2-byte hole in the keylist header as the index of the next link to be discarded. This is useful when using a keyring as a cache for DNS results or foreign filesystem IDs. This can be tested by the following. As root do: echo 1000 >/proc/sys/kernel/keys/root_maxkeys kr=`keyctl newring foo @s` for ((i=0; i<2000; i++)); do keyctl add user a$i a $kr; done Without this patch ENFILE should be reported when the keyring fills up. With this patch, the keyring discards keys in an LRU fashion. Note that the stored LRU time has a granularity of 1s. After doing this, /proc/key-users can be observed and should show that most of the 2000 keys have been discarded: [root@andromeda ~]# cat /proc/key-users 0: 517 516/516 513/1000 5249/20000 The "513/1000" here is the number of quota-accounted keys present for this user out of the maximum permitted. In /proc/keys, the keyring shows the number of keys it has and the number of slots it has allocated: [root@andromeda ~]# grep foo /proc/keys 200c64c4 I--Q-- 1 perm 3b3f0000 0 0 keyring foo: 509/509 The maximum is (PAGE_SIZE - header) / key pointer size. That's typically 509 on a 64-bit system and 1020 on a 32-bit system. Signed-off-by: David Howells <dhowells@redhat.com>
2012-05-11 17:56:56 +08:00
if (key->last_used_at < lowest_lru) {
lowest_lru = key->last_used_at;
lru = loop;
}
}
}
KEYS: Do LRU discard in full keyrings Do an LRU discard in keyrings that are full rather than returning ENFILE. To perform this, a time_t is added to the key struct and updated by the creation of a link to a key and by a key being found as the result of a search. At the completion of a successful search, the keyrings in the path between the root of the search and the first found link to it also have their last-used times updated. Note that discarding a link to a key from a keyring does not necessarily destroy the key as there may be references held by other places. An alternate discard method that might suffice is to perform FIFO discard from the keyring, using the spare 2-byte hole in the keylist header as the index of the next link to be discarded. This is useful when using a keyring as a cache for DNS results or foreign filesystem IDs. This can be tested by the following. As root do: echo 1000 >/proc/sys/kernel/keys/root_maxkeys kr=`keyctl newring foo @s` for ((i=0; i<2000; i++)); do keyctl add user a$i a $kr; done Without this patch ENFILE should be reported when the keyring fills up. With this patch, the keyring discards keys in an LRU fashion. Note that the stored LRU time has a granularity of 1s. After doing this, /proc/key-users can be observed and should show that most of the 2000 keys have been discarded: [root@andromeda ~]# cat /proc/key-users 0: 517 516/516 513/1000 5249/20000 The "513/1000" here is the number of quota-accounted keys present for this user out of the maximum permitted. In /proc/keys, the keyring shows the number of keys it has and the number of slots it has allocated: [root@andromeda ~]# grep foo /proc/keys 200c64c4 I--Q-- 1 perm 3b3f0000 0 0 keyring foo: 509/509 The maximum is (PAGE_SIZE - header) / key pointer size. That's typically 509 on a 64-bit system and 1020 on a 32-bit system. Signed-off-by: David Howells <dhowells@redhat.com>
2012-05-11 17:56:56 +08:00
/* If the keyring is full then do an LRU discard */
if (klist &&
klist->nkeys == klist->maxkeys &&
klist->maxkeys >= MAX_KEYRING_LINKS) {
kdebug("LRU discard %d\n", lru);
klist->delkey = lru;
prealloc = 0;
goto done;
}
/* check that we aren't going to overrun the user's quota */
ret = key_payload_reserve(keyring,
keyring->datalen + KEYQUOTA_LINK_BYTES);
if (ret < 0)
goto error_sem;
if (klist && klist->nkeys < klist->maxkeys) {
/* there's sufficient slack space to append directly */
klist->delkey = klist->nkeys;
prealloc = KEY_LINK_FIXQUOTA;
} else {
/* grow the key list */
max = 4;
KEYS: Do LRU discard in full keyrings Do an LRU discard in keyrings that are full rather than returning ENFILE. To perform this, a time_t is added to the key struct and updated by the creation of a link to a key and by a key being found as the result of a search. At the completion of a successful search, the keyrings in the path between the root of the search and the first found link to it also have their last-used times updated. Note that discarding a link to a key from a keyring does not necessarily destroy the key as there may be references held by other places. An alternate discard method that might suffice is to perform FIFO discard from the keyring, using the spare 2-byte hole in the keylist header as the index of the next link to be discarded. This is useful when using a keyring as a cache for DNS results or foreign filesystem IDs. This can be tested by the following. As root do: echo 1000 >/proc/sys/kernel/keys/root_maxkeys kr=`keyctl newring foo @s` for ((i=0; i<2000; i++)); do keyctl add user a$i a $kr; done Without this patch ENFILE should be reported when the keyring fills up. With this patch, the keyring discards keys in an LRU fashion. Note that the stored LRU time has a granularity of 1s. After doing this, /proc/key-users can be observed and should show that most of the 2000 keys have been discarded: [root@andromeda ~]# cat /proc/key-users 0: 517 516/516 513/1000 5249/20000 The "513/1000" here is the number of quota-accounted keys present for this user out of the maximum permitted. In /proc/keys, the keyring shows the number of keys it has and the number of slots it has allocated: [root@andromeda ~]# grep foo /proc/keys 200c64c4 I--Q-- 1 perm 3b3f0000 0 0 keyring foo: 509/509 The maximum is (PAGE_SIZE - header) / key pointer size. That's typically 509 on a 64-bit system and 1020 on a 32-bit system. Signed-off-by: David Howells <dhowells@redhat.com>
2012-05-11 17:56:56 +08:00
if (klist) {
max += klist->maxkeys;
KEYS: Do LRU discard in full keyrings Do an LRU discard in keyrings that are full rather than returning ENFILE. To perform this, a time_t is added to the key struct and updated by the creation of a link to a key and by a key being found as the result of a search. At the completion of a successful search, the keyrings in the path between the root of the search and the first found link to it also have their last-used times updated. Note that discarding a link to a key from a keyring does not necessarily destroy the key as there may be references held by other places. An alternate discard method that might suffice is to perform FIFO discard from the keyring, using the spare 2-byte hole in the keylist header as the index of the next link to be discarded. This is useful when using a keyring as a cache for DNS results or foreign filesystem IDs. This can be tested by the following. As root do: echo 1000 >/proc/sys/kernel/keys/root_maxkeys kr=`keyctl newring foo @s` for ((i=0; i<2000; i++)); do keyctl add user a$i a $kr; done Without this patch ENFILE should be reported when the keyring fills up. With this patch, the keyring discards keys in an LRU fashion. Note that the stored LRU time has a granularity of 1s. After doing this, /proc/key-users can be observed and should show that most of the 2000 keys have been discarded: [root@andromeda ~]# cat /proc/key-users 0: 517 516/516 513/1000 5249/20000 The "513/1000" here is the number of quota-accounted keys present for this user out of the maximum permitted. In /proc/keys, the keyring shows the number of keys it has and the number of slots it has allocated: [root@andromeda ~]# grep foo /proc/keys 200c64c4 I--Q-- 1 perm 3b3f0000 0 0 keyring foo: 509/509 The maximum is (PAGE_SIZE - header) / key pointer size. That's typically 509 on a 64-bit system and 1020 on a 32-bit system. Signed-off-by: David Howells <dhowells@redhat.com>
2012-05-11 17:56:56 +08:00
if (max > MAX_KEYRING_LINKS)
max = MAX_KEYRING_LINKS;
BUG_ON(max <= klist->maxkeys);
}
size = sizeof(*klist) + sizeof(struct key *) * max;
ret = -ENOMEM;
nklist = kmalloc(size, GFP_KERNEL);
if (!nklist)
goto error_quota;
nklist->maxkeys = max;
if (klist) {
memcpy(nklist->keys, klist->keys,
sizeof(struct key *) * klist->nkeys);
nklist->delkey = klist->nkeys;
nklist->nkeys = klist->nkeys + 1;
klist->delkey = USHRT_MAX;
} else {
nklist->nkeys = 1;
nklist->delkey = 0;
}
/* add the key into the new space */
RCU_INIT_POINTER(nklist->keys[nklist->delkey], NULL);
prealloc = (unsigned long)nklist | KEY_LINK_FIXQUOTA;
}
done:
*_prealloc = prealloc;
kleave(" = 0");
return 0;
error_quota:
/* undo the quota changes */
key_payload_reserve(keyring,
keyring->datalen - KEYQUOTA_LINK_BYTES);
error_sem:
if (type == &key_type_keyring)
up_write(&keyring_serialise_link_sem);
error_krsem:
up_write(&keyring->sem);
kleave(" = %d", ret);
return ret;
}
/*
* Check already instantiated keys aren't going to be a problem.
*
* The caller must have called __key_link_begin(). Don't need to call this for
* keys that were created since __key_link_begin() was called.
*/
int __key_link_check_live_key(struct key *keyring, struct key *key)
{
if (key->type == &key_type_keyring)
/* check that we aren't going to create a cycle by linking one
* keyring to another */
return keyring_detect_cycle(keyring, key);
return 0;
}
/*
* Link a key into to a keyring.
*
* Must be called with __key_link_begin() having being called. Discards any
* already extant link to matching key if there is one, so that each keyring
* holds at most one link to any given key of a particular type+description
* combination.
*/
void __key_link(struct key *keyring, struct key *key,
unsigned long *_prealloc)
{
struct keyring_list *klist, *nklist;
struct key *discard;
nklist = (struct keyring_list *)(*_prealloc & ~KEY_LINK_FIXQUOTA);
*_prealloc = 0;
kenter("%d,%d,%p", keyring->serial, key->serial, nklist);
klist = rcu_dereference_locked_keyring(keyring);
atomic_inc(&key->usage);
KEYS: Do LRU discard in full keyrings Do an LRU discard in keyrings that are full rather than returning ENFILE. To perform this, a time_t is added to the key struct and updated by the creation of a link to a key and by a key being found as the result of a search. At the completion of a successful search, the keyrings in the path between the root of the search and the first found link to it also have their last-used times updated. Note that discarding a link to a key from a keyring does not necessarily destroy the key as there may be references held by other places. An alternate discard method that might suffice is to perform FIFO discard from the keyring, using the spare 2-byte hole in the keylist header as the index of the next link to be discarded. This is useful when using a keyring as a cache for DNS results or foreign filesystem IDs. This can be tested by the following. As root do: echo 1000 >/proc/sys/kernel/keys/root_maxkeys kr=`keyctl newring foo @s` for ((i=0; i<2000; i++)); do keyctl add user a$i a $kr; done Without this patch ENFILE should be reported when the keyring fills up. With this patch, the keyring discards keys in an LRU fashion. Note that the stored LRU time has a granularity of 1s. After doing this, /proc/key-users can be observed and should show that most of the 2000 keys have been discarded: [root@andromeda ~]# cat /proc/key-users 0: 517 516/516 513/1000 5249/20000 The "513/1000" here is the number of quota-accounted keys present for this user out of the maximum permitted. In /proc/keys, the keyring shows the number of keys it has and the number of slots it has allocated: [root@andromeda ~]# grep foo /proc/keys 200c64c4 I--Q-- 1 perm 3b3f0000 0 0 keyring foo: 509/509 The maximum is (PAGE_SIZE - header) / key pointer size. That's typically 509 on a 64-bit system and 1020 on a 32-bit system. Signed-off-by: David Howells <dhowells@redhat.com>
2012-05-11 17:56:56 +08:00
keyring->last_used_at = key->last_used_at =
current_kernel_time().tv_sec;
/* there's a matching key we can displace or an empty slot in a newly
* allocated list we can fill */
if (nklist) {
kdebug("reissue %hu/%hu/%hu",
nklist->delkey, nklist->nkeys, nklist->maxkeys);
RCU_INIT_POINTER(nklist->keys[nklist->delkey], key);
rcu_assign_pointer(keyring->payload.subscriptions, nklist);
/* dispose of the old keyring list and, if there was one, the
* displaced key */
if (klist) {
kdebug("dispose %hu/%hu/%hu",
klist->delkey, klist->nkeys, klist->maxkeys);
call_rcu(&klist->rcu, keyring_unlink_rcu_disposal);
}
} else if (klist->delkey < klist->nkeys) {
kdebug("replace %hu/%hu/%hu",
klist->delkey, klist->nkeys, klist->maxkeys);
discard = rcu_dereference_protected(
klist->keys[klist->delkey],
rwsem_is_locked(&keyring->sem));
rcu_assign_pointer(klist->keys[klist->delkey], key);
/* The garbage collector will take care of RCU
* synchronisation */
key_put(discard);
} else {
/* there's sufficient slack space to append directly */
kdebug("append %hu/%hu/%hu",
klist->delkey, klist->nkeys, klist->maxkeys);
RCU_INIT_POINTER(klist->keys[klist->delkey], key);
smp_wmb();
klist->nkeys++;
}
}
/*
* Finish linking a key into to a keyring.
*
* Must be called with __key_link_begin() having being called.
*/
void __key_link_end(struct key *keyring, struct key_type *type,
unsigned long prealloc)
__releases(&keyring->sem)
{
BUG_ON(type == NULL);
BUG_ON(type->name == NULL);
kenter("%d,%s,%lx", keyring->serial, type->name, prealloc);
if (type == &key_type_keyring)
up_write(&keyring_serialise_link_sem);
if (prealloc) {
if (prealloc & KEY_LINK_FIXQUOTA)
key_payload_reserve(keyring,
keyring->datalen -
KEYQUOTA_LINK_BYTES);
kfree((struct keyring_list *)(prealloc & ~KEY_LINK_FIXQUOTA));
}
up_write(&keyring->sem);
}
/**
* key_link - Link a key to a keyring
* @keyring: The keyring to make the link in.
* @key: The key to link to.
*
* Make a link in a keyring to a key, such that the keyring holds a reference
* on that key and the key can potentially be found by searching that keyring.
*
* This function will write-lock the keyring's semaphore and will consume some
* of the user's key data quota to hold the link.
*
* Returns 0 if successful, -ENOTDIR if the keyring isn't a keyring,
* -EKEYREVOKED if the keyring has been revoked, -ENFILE if the keyring is
* full, -EDQUOT if there is insufficient key data quota remaining to add
* another link or -ENOMEM if there's insufficient memory.
*
* It is assumed that the caller has checked that it is permitted for a link to
* be made (the keyring should have Write permission and the key Link
* permission).
*/
int key_link(struct key *keyring, struct key *key)
{
unsigned long prealloc;
int ret;
key_check(keyring);
key_check(key);
ret = __key_link_begin(keyring, key->type, key->description, &prealloc);
if (ret == 0) {
ret = __key_link_check_live_key(keyring, key);
if (ret == 0)
__key_link(keyring, key, &prealloc);
__key_link_end(keyring, key->type, prealloc);
}
return ret;
}
EXPORT_SYMBOL(key_link);
/**
* key_unlink - Unlink the first link to a key from a keyring.
* @keyring: The keyring to remove the link from.
* @key: The key the link is to.
*
* Remove a link from a keyring to a key.
*
* This function will write-lock the keyring's semaphore.
*
* Returns 0 if successful, -ENOTDIR if the keyring isn't a keyring, -ENOENT if
* the key isn't linked to by the keyring or -ENOMEM if there's insufficient
* memory.
*
* It is assumed that the caller has checked that it is permitted for a link to
* be removed (the keyring should have Write permission; no permissions are
* required on the key).
*/
int key_unlink(struct key *keyring, struct key *key)
{
[PATCH] keys: Discard key spinlock and use RCU for key payload The attached patch changes the key implementation in a number of ways: (1) It removes the spinlock from the key structure. (2) The key flags are now accessed using atomic bitops instead of write-locking the key spinlock and using C bitwise operators. The three instantiation flags are dealt with with the construction semaphore held during the request_key/instantiate/negate sequence, thus rendering the spinlock superfluous. The key flags are also now bit numbers not bit masks. (3) The key payload is now accessed using RCU. This permits the recursive keyring search algorithm to be simplified greatly since no locks need be taken other than the usual RCU preemption disablement. Searching now does not require any locks or semaphores to be held; merely that the starting keyring be pinned. (4) The keyring payload now includes an RCU head so that it can be disposed of by call_rcu(). This requires that the payload be copied on unlink to prevent introducing races in copy-down vs search-up. (5) The user key payload is now a structure with the data following it. It includes an RCU head like the keyring payload and for the same reason. It also contains a data length because the data length in the key may be changed on another CPU whilst an RCU protected read is in progress on the payload. This would then see the supposed RCU payload and the on-key data length getting out of sync. I'm tempted to drop the key's datalen entirely, except that it's used in conjunction with quota management and so is a little tricky to get rid of. (6) Update the keys documentation. Signed-Off-By: David Howells <dhowells@redhat.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-06-24 13:00:49 +08:00
struct keyring_list *klist, *nklist;
int loop, ret;
key_check(keyring);
key_check(key);
ret = -ENOTDIR;
if (keyring->type != &key_type_keyring)
goto error;
down_write(&keyring->sem);
klist = rcu_dereference_locked_keyring(keyring);
if (klist) {
/* search the keyring for the key */
for (loop = 0; loop < klist->nkeys; loop++)
if (rcu_access_pointer(klist->keys[loop]) == key)
goto key_is_present;
}
up_write(&keyring->sem);
ret = -ENOENT;
goto error;
[PATCH] keys: Discard key spinlock and use RCU for key payload The attached patch changes the key implementation in a number of ways: (1) It removes the spinlock from the key structure. (2) The key flags are now accessed using atomic bitops instead of write-locking the key spinlock and using C bitwise operators. The three instantiation flags are dealt with with the construction semaphore held during the request_key/instantiate/negate sequence, thus rendering the spinlock superfluous. The key flags are also now bit numbers not bit masks. (3) The key payload is now accessed using RCU. This permits the recursive keyring search algorithm to be simplified greatly since no locks need be taken other than the usual RCU preemption disablement. Searching now does not require any locks or semaphores to be held; merely that the starting keyring be pinned. (4) The keyring payload now includes an RCU head so that it can be disposed of by call_rcu(). This requires that the payload be copied on unlink to prevent introducing races in copy-down vs search-up. (5) The user key payload is now a structure with the data following it. It includes an RCU head like the keyring payload and for the same reason. It also contains a data length because the data length in the key may be changed on another CPU whilst an RCU protected read is in progress on the payload. This would then see the supposed RCU payload and the on-key data length getting out of sync. I'm tempted to drop the key's datalen entirely, except that it's used in conjunction with quota management and so is a little tricky to get rid of. (6) Update the keys documentation. Signed-Off-By: David Howells <dhowells@redhat.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-06-24 13:00:49 +08:00
key_is_present:
/* we need to copy the key list for RCU purposes */
nklist = kmalloc(sizeof(*klist) +
sizeof(struct key *) * klist->maxkeys,
[PATCH] keys: Discard key spinlock and use RCU for key payload The attached patch changes the key implementation in a number of ways: (1) It removes the spinlock from the key structure. (2) The key flags are now accessed using atomic bitops instead of write-locking the key spinlock and using C bitwise operators. The three instantiation flags are dealt with with the construction semaphore held during the request_key/instantiate/negate sequence, thus rendering the spinlock superfluous. The key flags are also now bit numbers not bit masks. (3) The key payload is now accessed using RCU. This permits the recursive keyring search algorithm to be simplified greatly since no locks need be taken other than the usual RCU preemption disablement. Searching now does not require any locks or semaphores to be held; merely that the starting keyring be pinned. (4) The keyring payload now includes an RCU head so that it can be disposed of by call_rcu(). This requires that the payload be copied on unlink to prevent introducing races in copy-down vs search-up. (5) The user key payload is now a structure with the data following it. It includes an RCU head like the keyring payload and for the same reason. It also contains a data length because the data length in the key may be changed on another CPU whilst an RCU protected read is in progress on the payload. This would then see the supposed RCU payload and the on-key data length getting out of sync. I'm tempted to drop the key's datalen entirely, except that it's used in conjunction with quota management and so is a little tricky to get rid of. (6) Update the keys documentation. Signed-Off-By: David Howells <dhowells@redhat.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-06-24 13:00:49 +08:00
GFP_KERNEL);
if (!nklist)
goto nomem;
nklist->maxkeys = klist->maxkeys;
nklist->nkeys = klist->nkeys - 1;
if (loop > 0)
memcpy(&nklist->keys[0],
&klist->keys[0],
loop * sizeof(struct key *));
[PATCH] keys: Discard key spinlock and use RCU for key payload The attached patch changes the key implementation in a number of ways: (1) It removes the spinlock from the key structure. (2) The key flags are now accessed using atomic bitops instead of write-locking the key spinlock and using C bitwise operators. The three instantiation flags are dealt with with the construction semaphore held during the request_key/instantiate/negate sequence, thus rendering the spinlock superfluous. The key flags are also now bit numbers not bit masks. (3) The key payload is now accessed using RCU. This permits the recursive keyring search algorithm to be simplified greatly since no locks need be taken other than the usual RCU preemption disablement. Searching now does not require any locks or semaphores to be held; merely that the starting keyring be pinned. (4) The keyring payload now includes an RCU head so that it can be disposed of by call_rcu(). This requires that the payload be copied on unlink to prevent introducing races in copy-down vs search-up. (5) The user key payload is now a structure with the data following it. It includes an RCU head like the keyring payload and for the same reason. It also contains a data length because the data length in the key may be changed on another CPU whilst an RCU protected read is in progress on the payload. This would then see the supposed RCU payload and the on-key data length getting out of sync. I'm tempted to drop the key's datalen entirely, except that it's used in conjunction with quota management and so is a little tricky to get rid of. (6) Update the keys documentation. Signed-Off-By: David Howells <dhowells@redhat.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-06-24 13:00:49 +08:00
if (loop < nklist->nkeys)
memcpy(&nklist->keys[loop],
&klist->keys[loop + 1],
(nklist->nkeys - loop) * sizeof(struct key *));
[PATCH] keys: Discard key spinlock and use RCU for key payload The attached patch changes the key implementation in a number of ways: (1) It removes the spinlock from the key structure. (2) The key flags are now accessed using atomic bitops instead of write-locking the key spinlock and using C bitwise operators. The three instantiation flags are dealt with with the construction semaphore held during the request_key/instantiate/negate sequence, thus rendering the spinlock superfluous. The key flags are also now bit numbers not bit masks. (3) The key payload is now accessed using RCU. This permits the recursive keyring search algorithm to be simplified greatly since no locks need be taken other than the usual RCU preemption disablement. Searching now does not require any locks or semaphores to be held; merely that the starting keyring be pinned. (4) The keyring payload now includes an RCU head so that it can be disposed of by call_rcu(). This requires that the payload be copied on unlink to prevent introducing races in copy-down vs search-up. (5) The user key payload is now a structure with the data following it. It includes an RCU head like the keyring payload and for the same reason. It also contains a data length because the data length in the key may be changed on another CPU whilst an RCU protected read is in progress on the payload. This would then see the supposed RCU payload and the on-key data length getting out of sync. I'm tempted to drop the key's datalen entirely, except that it's used in conjunction with quota management and so is a little tricky to get rid of. (6) Update the keys documentation. Signed-Off-By: David Howells <dhowells@redhat.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-06-24 13:00:49 +08:00
/* adjust the user's quota */
key_payload_reserve(keyring,
keyring->datalen - KEYQUOTA_LINK_BYTES);
[PATCH] keys: Discard key spinlock and use RCU for key payload The attached patch changes the key implementation in a number of ways: (1) It removes the spinlock from the key structure. (2) The key flags are now accessed using atomic bitops instead of write-locking the key spinlock and using C bitwise operators. The three instantiation flags are dealt with with the construction semaphore held during the request_key/instantiate/negate sequence, thus rendering the spinlock superfluous. The key flags are also now bit numbers not bit masks. (3) The key payload is now accessed using RCU. This permits the recursive keyring search algorithm to be simplified greatly since no locks need be taken other than the usual RCU preemption disablement. Searching now does not require any locks or semaphores to be held; merely that the starting keyring be pinned. (4) The keyring payload now includes an RCU head so that it can be disposed of by call_rcu(). This requires that the payload be copied on unlink to prevent introducing races in copy-down vs search-up. (5) The user key payload is now a structure with the data following it. It includes an RCU head like the keyring payload and for the same reason. It also contains a data length because the data length in the key may be changed on another CPU whilst an RCU protected read is in progress on the payload. This would then see the supposed RCU payload and the on-key data length getting out of sync. I'm tempted to drop the key's datalen entirely, except that it's used in conjunction with quota management and so is a little tricky to get rid of. (6) Update the keys documentation. Signed-Off-By: David Howells <dhowells@redhat.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-06-24 13:00:49 +08:00
rcu_assign_pointer(keyring->payload.subscriptions, nklist);
[PATCH] keys: Discard key spinlock and use RCU for key payload The attached patch changes the key implementation in a number of ways: (1) It removes the spinlock from the key structure. (2) The key flags are now accessed using atomic bitops instead of write-locking the key spinlock and using C bitwise operators. The three instantiation flags are dealt with with the construction semaphore held during the request_key/instantiate/negate sequence, thus rendering the spinlock superfluous. The key flags are also now bit numbers not bit masks. (3) The key payload is now accessed using RCU. This permits the recursive keyring search algorithm to be simplified greatly since no locks need be taken other than the usual RCU preemption disablement. Searching now does not require any locks or semaphores to be held; merely that the starting keyring be pinned. (4) The keyring payload now includes an RCU head so that it can be disposed of by call_rcu(). This requires that the payload be copied on unlink to prevent introducing races in copy-down vs search-up. (5) The user key payload is now a structure with the data following it. It includes an RCU head like the keyring payload and for the same reason. It also contains a data length because the data length in the key may be changed on another CPU whilst an RCU protected read is in progress on the payload. This would then see the supposed RCU payload and the on-key data length getting out of sync. I'm tempted to drop the key's datalen entirely, except that it's used in conjunction with quota management and so is a little tricky to get rid of. (6) Update the keys documentation. Signed-Off-By: David Howells <dhowells@redhat.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-06-24 13:00:49 +08:00
up_write(&keyring->sem);
[PATCH] keys: Discard key spinlock and use RCU for key payload The attached patch changes the key implementation in a number of ways: (1) It removes the spinlock from the key structure. (2) The key flags are now accessed using atomic bitops instead of write-locking the key spinlock and using C bitwise operators. The three instantiation flags are dealt with with the construction semaphore held during the request_key/instantiate/negate sequence, thus rendering the spinlock superfluous. The key flags are also now bit numbers not bit masks. (3) The key payload is now accessed using RCU. This permits the recursive keyring search algorithm to be simplified greatly since no locks need be taken other than the usual RCU preemption disablement. Searching now does not require any locks or semaphores to be held; merely that the starting keyring be pinned. (4) The keyring payload now includes an RCU head so that it can be disposed of by call_rcu(). This requires that the payload be copied on unlink to prevent introducing races in copy-down vs search-up. (5) The user key payload is now a structure with the data following it. It includes an RCU head like the keyring payload and for the same reason. It also contains a data length because the data length in the key may be changed on another CPU whilst an RCU protected read is in progress on the payload. This would then see the supposed RCU payload and the on-key data length getting out of sync. I'm tempted to drop the key's datalen entirely, except that it's used in conjunction with quota management and so is a little tricky to get rid of. (6) Update the keys documentation. Signed-Off-By: David Howells <dhowells@redhat.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-06-24 13:00:49 +08:00
/* schedule for later cleanup */
klist->delkey = loop;
call_rcu(&klist->rcu, keyring_unlink_rcu_disposal);
ret = 0;
[PATCH] keys: Discard key spinlock and use RCU for key payload The attached patch changes the key implementation in a number of ways: (1) It removes the spinlock from the key structure. (2) The key flags are now accessed using atomic bitops instead of write-locking the key spinlock and using C bitwise operators. The three instantiation flags are dealt with with the construction semaphore held during the request_key/instantiate/negate sequence, thus rendering the spinlock superfluous. The key flags are also now bit numbers not bit masks. (3) The key payload is now accessed using RCU. This permits the recursive keyring search algorithm to be simplified greatly since no locks need be taken other than the usual RCU preemption disablement. Searching now does not require any locks or semaphores to be held; merely that the starting keyring be pinned. (4) The keyring payload now includes an RCU head so that it can be disposed of by call_rcu(). This requires that the payload be copied on unlink to prevent introducing races in copy-down vs search-up. (5) The user key payload is now a structure with the data following it. It includes an RCU head like the keyring payload and for the same reason. It also contains a data length because the data length in the key may be changed on another CPU whilst an RCU protected read is in progress on the payload. This would then see the supposed RCU payload and the on-key data length getting out of sync. I'm tempted to drop the key's datalen entirely, except that it's used in conjunction with quota management and so is a little tricky to get rid of. (6) Update the keys documentation. Signed-Off-By: David Howells <dhowells@redhat.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-06-24 13:00:49 +08:00
error:
return ret;
[PATCH] keys: Discard key spinlock and use RCU for key payload The attached patch changes the key implementation in a number of ways: (1) It removes the spinlock from the key structure. (2) The key flags are now accessed using atomic bitops instead of write-locking the key spinlock and using C bitwise operators. The three instantiation flags are dealt with with the construction semaphore held during the request_key/instantiate/negate sequence, thus rendering the spinlock superfluous. The key flags are also now bit numbers not bit masks. (3) The key payload is now accessed using RCU. This permits the recursive keyring search algorithm to be simplified greatly since no locks need be taken other than the usual RCU preemption disablement. Searching now does not require any locks or semaphores to be held; merely that the starting keyring be pinned. (4) The keyring payload now includes an RCU head so that it can be disposed of by call_rcu(). This requires that the payload be copied on unlink to prevent introducing races in copy-down vs search-up. (5) The user key payload is now a structure with the data following it. It includes an RCU head like the keyring payload and for the same reason. It also contains a data length because the data length in the key may be changed on another CPU whilst an RCU protected read is in progress on the payload. This would then see the supposed RCU payload and the on-key data length getting out of sync. I'm tempted to drop the key's datalen entirely, except that it's used in conjunction with quota management and so is a little tricky to get rid of. (6) Update the keys documentation. Signed-Off-By: David Howells <dhowells@redhat.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-06-24 13:00:49 +08:00
nomem:
ret = -ENOMEM;
up_write(&keyring->sem);
goto error;
}
EXPORT_SYMBOL(key_unlink);
[PATCH] keys: Discard key spinlock and use RCU for key payload The attached patch changes the key implementation in a number of ways: (1) It removes the spinlock from the key structure. (2) The key flags are now accessed using atomic bitops instead of write-locking the key spinlock and using C bitwise operators. The three instantiation flags are dealt with with the construction semaphore held during the request_key/instantiate/negate sequence, thus rendering the spinlock superfluous. The key flags are also now bit numbers not bit masks. (3) The key payload is now accessed using RCU. This permits the recursive keyring search algorithm to be simplified greatly since no locks need be taken other than the usual RCU preemption disablement. Searching now does not require any locks or semaphores to be held; merely that the starting keyring be pinned. (4) The keyring payload now includes an RCU head so that it can be disposed of by call_rcu(). This requires that the payload be copied on unlink to prevent introducing races in copy-down vs search-up. (5) The user key payload is now a structure with the data following it. It includes an RCU head like the keyring payload and for the same reason. It also contains a data length because the data length in the key may be changed on another CPU whilst an RCU protected read is in progress on the payload. This would then see the supposed RCU payload and the on-key data length getting out of sync. I'm tempted to drop the key's datalen entirely, except that it's used in conjunction with quota management and so is a little tricky to get rid of. (6) Update the keys documentation. Signed-Off-By: David Howells <dhowells@redhat.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-06-24 13:00:49 +08:00
/*
* Dispose of a keyring list after the RCU grace period, releasing the keys it
* links to.
[PATCH] keys: Discard key spinlock and use RCU for key payload The attached patch changes the key implementation in a number of ways: (1) It removes the spinlock from the key structure. (2) The key flags are now accessed using atomic bitops instead of write-locking the key spinlock and using C bitwise operators. The three instantiation flags are dealt with with the construction semaphore held during the request_key/instantiate/negate sequence, thus rendering the spinlock superfluous. The key flags are also now bit numbers not bit masks. (3) The key payload is now accessed using RCU. This permits the recursive keyring search algorithm to be simplified greatly since no locks need be taken other than the usual RCU preemption disablement. Searching now does not require any locks or semaphores to be held; merely that the starting keyring be pinned. (4) The keyring payload now includes an RCU head so that it can be disposed of by call_rcu(). This requires that the payload be copied on unlink to prevent introducing races in copy-down vs search-up. (5) The user key payload is now a structure with the data following it. It includes an RCU head like the keyring payload and for the same reason. It also contains a data length because the data length in the key may be changed on another CPU whilst an RCU protected read is in progress on the payload. This would then see the supposed RCU payload and the on-key data length getting out of sync. I'm tempted to drop the key's datalen entirely, except that it's used in conjunction with quota management and so is a little tricky to get rid of. (6) Update the keys documentation. Signed-Off-By: David Howells <dhowells@redhat.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-06-24 13:00:49 +08:00
*/
static void keyring_clear_rcu_disposal(struct rcu_head *rcu)
{
struct keyring_list *klist;
int loop;
klist = container_of(rcu, struct keyring_list, rcu);
for (loop = klist->nkeys - 1; loop >= 0; loop--)
key_put(rcu_access_pointer(klist->keys[loop]));
[PATCH] keys: Discard key spinlock and use RCU for key payload The attached patch changes the key implementation in a number of ways: (1) It removes the spinlock from the key structure. (2) The key flags are now accessed using atomic bitops instead of write-locking the key spinlock and using C bitwise operators. The three instantiation flags are dealt with with the construction semaphore held during the request_key/instantiate/negate sequence, thus rendering the spinlock superfluous. The key flags are also now bit numbers not bit masks. (3) The key payload is now accessed using RCU. This permits the recursive keyring search algorithm to be simplified greatly since no locks need be taken other than the usual RCU preemption disablement. Searching now does not require any locks or semaphores to be held; merely that the starting keyring be pinned. (4) The keyring payload now includes an RCU head so that it can be disposed of by call_rcu(). This requires that the payload be copied on unlink to prevent introducing races in copy-down vs search-up. (5) The user key payload is now a structure with the data following it. It includes an RCU head like the keyring payload and for the same reason. It also contains a data length because the data length in the key may be changed on another CPU whilst an RCU protected read is in progress on the payload. This would then see the supposed RCU payload and the on-key data length getting out of sync. I'm tempted to drop the key's datalen entirely, except that it's used in conjunction with quota management and so is a little tricky to get rid of. (6) Update the keys documentation. Signed-Off-By: David Howells <dhowells@redhat.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-06-24 13:00:49 +08:00
kfree(klist);
}
[PATCH] keys: Discard key spinlock and use RCU for key payload The attached patch changes the key implementation in a number of ways: (1) It removes the spinlock from the key structure. (2) The key flags are now accessed using atomic bitops instead of write-locking the key spinlock and using C bitwise operators. The three instantiation flags are dealt with with the construction semaphore held during the request_key/instantiate/negate sequence, thus rendering the spinlock superfluous. The key flags are also now bit numbers not bit masks. (3) The key payload is now accessed using RCU. This permits the recursive keyring search algorithm to be simplified greatly since no locks need be taken other than the usual RCU preemption disablement. Searching now does not require any locks or semaphores to be held; merely that the starting keyring be pinned. (4) The keyring payload now includes an RCU head so that it can be disposed of by call_rcu(). This requires that the payload be copied on unlink to prevent introducing races in copy-down vs search-up. (5) The user key payload is now a structure with the data following it. It includes an RCU head like the keyring payload and for the same reason. It also contains a data length because the data length in the key may be changed on another CPU whilst an RCU protected read is in progress on the payload. This would then see the supposed RCU payload and the on-key data length getting out of sync. I'm tempted to drop the key's datalen entirely, except that it's used in conjunction with quota management and so is a little tricky to get rid of. (6) Update the keys documentation. Signed-Off-By: David Howells <dhowells@redhat.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-06-24 13:00:49 +08:00
/**
* keyring_clear - Clear a keyring
* @keyring: The keyring to clear.
*
* Clear the contents of the specified keyring.
*
* Returns 0 if successful or -ENOTDIR if the keyring isn't a keyring.
*/
int keyring_clear(struct key *keyring)
{
struct keyring_list *klist;
[PATCH] keys: Discard key spinlock and use RCU for key payload The attached patch changes the key implementation in a number of ways: (1) It removes the spinlock from the key structure. (2) The key flags are now accessed using atomic bitops instead of write-locking the key spinlock and using C bitwise operators. The three instantiation flags are dealt with with the construction semaphore held during the request_key/instantiate/negate sequence, thus rendering the spinlock superfluous. The key flags are also now bit numbers not bit masks. (3) The key payload is now accessed using RCU. This permits the recursive keyring search algorithm to be simplified greatly since no locks need be taken other than the usual RCU preemption disablement. Searching now does not require any locks or semaphores to be held; merely that the starting keyring be pinned. (4) The keyring payload now includes an RCU head so that it can be disposed of by call_rcu(). This requires that the payload be copied on unlink to prevent introducing races in copy-down vs search-up. (5) The user key payload is now a structure with the data following it. It includes an RCU head like the keyring payload and for the same reason. It also contains a data length because the data length in the key may be changed on another CPU whilst an RCU protected read is in progress on the payload. This would then see the supposed RCU payload and the on-key data length getting out of sync. I'm tempted to drop the key's datalen entirely, except that it's used in conjunction with quota management and so is a little tricky to get rid of. (6) Update the keys documentation. Signed-Off-By: David Howells <dhowells@redhat.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-06-24 13:00:49 +08:00
int ret;
ret = -ENOTDIR;
if (keyring->type == &key_type_keyring) {
/* detach the pointer block with the locks held */
down_write(&keyring->sem);
klist = rcu_dereference_locked_keyring(keyring);
if (klist) {
/* adjust the quota */
key_payload_reserve(keyring,
sizeof(struct keyring_list));
[PATCH] keys: Discard key spinlock and use RCU for key payload The attached patch changes the key implementation in a number of ways: (1) It removes the spinlock from the key structure. (2) The key flags are now accessed using atomic bitops instead of write-locking the key spinlock and using C bitwise operators. The three instantiation flags are dealt with with the construction semaphore held during the request_key/instantiate/negate sequence, thus rendering the spinlock superfluous. The key flags are also now bit numbers not bit masks. (3) The key payload is now accessed using RCU. This permits the recursive keyring search algorithm to be simplified greatly since no locks need be taken other than the usual RCU preemption disablement. Searching now does not require any locks or semaphores to be held; merely that the starting keyring be pinned. (4) The keyring payload now includes an RCU head so that it can be disposed of by call_rcu(). This requires that the payload be copied on unlink to prevent introducing races in copy-down vs search-up. (5) The user key payload is now a structure with the data following it. It includes an RCU head like the keyring payload and for the same reason. It also contains a data length because the data length in the key may be changed on another CPU whilst an RCU protected read is in progress on the payload. This would then see the supposed RCU payload and the on-key data length getting out of sync. I'm tempted to drop the key's datalen entirely, except that it's used in conjunction with quota management and so is a little tricky to get rid of. (6) Update the keys documentation. Signed-Off-By: David Howells <dhowells@redhat.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-06-24 13:00:49 +08:00
rcu_assign_pointer(keyring->payload.subscriptions,
NULL);
}
up_write(&keyring->sem);
/* free the keys after the locks have been dropped */
[PATCH] keys: Discard key spinlock and use RCU for key payload The attached patch changes the key implementation in a number of ways: (1) It removes the spinlock from the key structure. (2) The key flags are now accessed using atomic bitops instead of write-locking the key spinlock and using C bitwise operators. The three instantiation flags are dealt with with the construction semaphore held during the request_key/instantiate/negate sequence, thus rendering the spinlock superfluous. The key flags are also now bit numbers not bit masks. (3) The key payload is now accessed using RCU. This permits the recursive keyring search algorithm to be simplified greatly since no locks need be taken other than the usual RCU preemption disablement. Searching now does not require any locks or semaphores to be held; merely that the starting keyring be pinned. (4) The keyring payload now includes an RCU head so that it can be disposed of by call_rcu(). This requires that the payload be copied on unlink to prevent introducing races in copy-down vs search-up. (5) The user key payload is now a structure with the data following it. It includes an RCU head like the keyring payload and for the same reason. It also contains a data length because the data length in the key may be changed on another CPU whilst an RCU protected read is in progress on the payload. This would then see the supposed RCU payload and the on-key data length getting out of sync. I'm tempted to drop the key's datalen entirely, except that it's used in conjunction with quota management and so is a little tricky to get rid of. (6) Update the keys documentation. Signed-Off-By: David Howells <dhowells@redhat.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-06-24 13:00:49 +08:00
if (klist)
call_rcu(&klist->rcu, keyring_clear_rcu_disposal);
ret = 0;
}
return ret;
}
EXPORT_SYMBOL(keyring_clear);
/*
* Dispose of the links from a revoked keyring.
*
* This is called with the key sem write-locked.
*/
static void keyring_revoke(struct key *keyring)
{
struct keyring_list *klist;
klist = rcu_dereference_locked_keyring(keyring);
/* adjust the quota */
key_payload_reserve(keyring, 0);
if (klist) {
rcu_assign_pointer(keyring->payload.subscriptions, NULL);
call_rcu(&klist->rcu, keyring_clear_rcu_disposal);
}
}
/*
* Collect garbage from the contents of a keyring, replacing the old list with
* a new one with the pointers all shuffled down.
*
* Dead keys are classed as oned that are flagged as being dead or are revoked,
* expired or negative keys that were revoked or expired before the specified
* limit.
*/
void keyring_gc(struct key *keyring, time_t limit)
{
struct keyring_list *klist, *new;
struct key *key;
int loop, keep, max;
kenter("{%x,%s}", key_serial(keyring), keyring->description);
down_write(&keyring->sem);
klist = rcu_dereference_locked_keyring(keyring);
if (!klist)
goto no_klist;
/* work out how many subscriptions we're keeping */
keep = 0;
for (loop = klist->nkeys - 1; loop >= 0; loop--)
if (!key_is_dead(rcu_deref_link_locked(klist, loop, keyring),
limit))
keep++;
if (keep == klist->nkeys)
goto just_return;
/* allocate a new keyring payload */
max = roundup(keep, 4);
new = kmalloc(sizeof(struct keyring_list) + max * sizeof(struct key *),
GFP_KERNEL);
if (!new)
goto nomem;
new->maxkeys = max;
new->nkeys = 0;
new->delkey = 0;
/* install the live keys
* - must take care as expired keys may be updated back to life
*/
keep = 0;
for (loop = klist->nkeys - 1; loop >= 0; loop--) {
key = rcu_deref_link_locked(klist, loop, keyring);
if (!key_is_dead(key, limit)) {
if (keep >= max)
goto discard_new;
RCU_INIT_POINTER(new->keys[keep++], key_get(key));
}
}
new->nkeys = keep;
/* adjust the quota */
key_payload_reserve(keyring,
sizeof(struct keyring_list) +
KEYQUOTA_LINK_BYTES * keep);
if (keep == 0) {
rcu_assign_pointer(keyring->payload.subscriptions, NULL);
kfree(new);
} else {
rcu_assign_pointer(keyring->payload.subscriptions, new);
}
up_write(&keyring->sem);
call_rcu(&klist->rcu, keyring_clear_rcu_disposal);
kleave(" [yes]");
return;
discard_new:
new->nkeys = keep;
keyring_clear_rcu_disposal(&new->rcu);
up_write(&keyring->sem);
kleave(" [discard]");
return;
just_return:
up_write(&keyring->sem);
kleave(" [no dead]");
return;
no_klist:
up_write(&keyring->sem);
kleave(" [no_klist]");
return;
nomem:
up_write(&keyring->sem);
kleave(" [oom]");
}