OpenCloudOS-Kernel/include/linux/security.h

3042 lines
110 KiB
C
Raw Normal View History

/*
* Linux Security plug
*
* Copyright (C) 2001 WireX Communications, Inc <chris@wirex.com>
* Copyright (C) 2001 Greg Kroah-Hartman <greg@kroah.com>
* Copyright (C) 2001 Networks Associates Technology, Inc <ssmalley@nai.com>
* Copyright (C) 2001 James Morris <jmorris@intercode.com.au>
* Copyright (C) 2001 Silicon Graphics, Inc. (Trust Technology Group)
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* Due to this file being licensed under the GPL there is controversy over
* whether this permits you to write a module that #includes this file
* without placing your module under the GPL. Please consult a lawyer for
* advice before doing this.
*
*/
#ifndef __LINUX_SECURITY_H
#define __LINUX_SECURITY_H
#include <linux/fs.h>
#include <linux/fsnotify.h>
#include <linux/binfmts.h>
#include <linux/signal.h>
#include <linux/resource.h>
#include <linux/sem.h>
#include <linux/shm.h>
#include <linux/mm.h> /* PAGE_ALIGN */
#include <linux/msg.h>
#include <linux/sched.h>
#include <linux/key.h>
#include <linux/xfrm.h>
include cleanup: Update gfp.h and slab.h includes to prepare for breaking implicit slab.h inclusion from percpu.h percpu.h is included by sched.h and module.h and thus ends up being included when building most .c files. percpu.h includes slab.h which in turn includes gfp.h making everything defined by the two files universally available and complicating inclusion dependencies. percpu.h -> slab.h dependency is about to be removed. Prepare for this change by updating users of gfp and slab facilities include those headers directly instead of assuming availability. As this conversion needs to touch large number of source files, the following script is used as the basis of conversion. http://userweb.kernel.org/~tj/misc/slabh-sweep.py The script does the followings. * Scan files for gfp and slab usages and update includes such that only the necessary includes are there. ie. if only gfp is used, gfp.h, if slab is used, slab.h. * When the script inserts a new include, it looks at the include blocks and try to put the new include such that its order conforms to its surrounding. It's put in the include block which contains core kernel includes, in the same order that the rest are ordered - alphabetical, Christmas tree, rev-Xmas-tree or at the end if there doesn't seem to be any matching order. * If the script can't find a place to put a new include (mostly because the file doesn't have fitting include block), it prints out an error message indicating which .h file needs to be added to the file. The conversion was done in the following steps. 1. The initial automatic conversion of all .c files updated slightly over 4000 files, deleting around 700 includes and adding ~480 gfp.h and ~3000 slab.h inclusions. The script emitted errors for ~400 files. 2. Each error was manually checked. Some didn't need the inclusion, some needed manual addition while adding it to implementation .h or embedding .c file was more appropriate for others. This step added inclusions to around 150 files. 3. The script was run again and the output was compared to the edits from #2 to make sure no file was left behind. 4. Several build tests were done and a couple of problems were fixed. e.g. lib/decompress_*.c used malloc/free() wrappers around slab APIs requiring slab.h to be added manually. 5. The script was run on all .h files but without automatically editing them as sprinkling gfp.h and slab.h inclusions around .h files could easily lead to inclusion dependency hell. Most gfp.h inclusion directives were ignored as stuff from gfp.h was usually wildly available and often used in preprocessor macros. Each slab.h inclusion directive was examined and added manually as necessary. 6. percpu.h was updated not to include slab.h. 7. Build test were done on the following configurations and failures were fixed. CONFIG_GCOV_KERNEL was turned off for all tests (as my distributed build env didn't work with gcov compiles) and a few more options had to be turned off depending on archs to make things build (like ipr on powerpc/64 which failed due to missing writeq). * x86 and x86_64 UP and SMP allmodconfig and a custom test config. * powerpc and powerpc64 SMP allmodconfig * sparc and sparc64 SMP allmodconfig * ia64 SMP allmodconfig * s390 SMP allmodconfig * alpha SMP allmodconfig * um on x86_64 SMP allmodconfig 8. percpu.h modifications were reverted so that it could be applied as a separate patch and serve as bisection point. Given the fact that I had only a couple of failures from tests on step 6, I'm fairly confident about the coverage of this conversion patch. If there is a breakage, it's likely to be something in one of the arch headers which should be easily discoverable easily on most builds of the specific arch. Signed-off-by: Tejun Heo <tj@kernel.org> Guess-its-ok-by: Christoph Lameter <cl@linux-foundation.org> Cc: Ingo Molnar <mingo@redhat.com> Cc: Lee Schermerhorn <Lee.Schermerhorn@hp.com>
2010-03-24 16:04:11 +08:00
#include <linux/slab.h>
#include <net/flow.h>
/* Maximum number of letters for an LSM name string */
#define SECURITY_NAME_MAX 10
/* If capable should audit the security request */
#define SECURITY_CAP_NOAUDIT 0
#define SECURITY_CAP_AUDIT 1
struct ctl_table;
struct audit_krule;
/*
* These functions are in security/capability.c and are used
* as the default capabilities functions
*/
CRED: Fix regression in cap_capable() as shown up by sys_faccessat() [ver #3] Fix a regression in cap_capable() due to: commit 3b11a1decef07c19443d24ae926982bc8ec9f4c0 Author: David Howells <dhowells@redhat.com> Date: Fri Nov 14 10:39:26 2008 +1100 CRED: Differentiate objective and effective subjective credentials on a task The problem is that the above patch allows a process to have two sets of credentials, and for the most part uses the subjective credentials when accessing current's creds. There is, however, one exception: cap_capable(), and thus capable(), uses the real/objective credentials of the target task, whether or not it is the current task. Ordinarily this doesn't matter, since usually the two cred pointers in current point to the same set of creds. However, sys_faccessat() makes use of this facility to override the credentials of the calling process to make its test, without affecting the creds as seen from other processes. One of the things sys_faccessat() does is to make an adjustment to the effective capabilities mask, which cap_capable(), as it stands, then ignores. The affected capability check is in generic_permission(): if (!(mask & MAY_EXEC) || execute_ok(inode)) if (capable(CAP_DAC_OVERRIDE)) return 0; This change passes the set of credentials to be tested down into the commoncap and SELinux code. The security functions called by capable() and has_capability() select the appropriate set of credentials from the process being checked. This can be tested by compiling the following program from the XFS testsuite: /* * t_access_root.c - trivial test program to show permission bug. * * Written by Michael Kerrisk - copyright ownership not pursued. * Sourced from: http://linux.derkeiler.com/Mailing-Lists/Kernel/2003-10/6030.html */ #include <limits.h> #include <unistd.h> #include <stdio.h> #include <stdlib.h> #include <fcntl.h> #include <sys/stat.h> #define UID 500 #define GID 100 #define PERM 0 #define TESTPATH "/tmp/t_access" static void errExit(char *msg) { perror(msg); exit(EXIT_FAILURE); } /* errExit */ static void accessTest(char *file, int mask, char *mstr) { printf("access(%s, %s) returns %d\n", file, mstr, access(file, mask)); } /* accessTest */ int main(int argc, char *argv[]) { int fd, perm, uid, gid; char *testpath; char cmd[PATH_MAX + 20]; testpath = (argc > 1) ? argv[1] : TESTPATH; perm = (argc > 2) ? strtoul(argv[2], NULL, 8) : PERM; uid = (argc > 3) ? atoi(argv[3]) : UID; gid = (argc > 4) ? atoi(argv[4]) : GID; unlink(testpath); fd = open(testpath, O_RDWR | O_CREAT, 0); if (fd == -1) errExit("open"); if (fchown(fd, uid, gid) == -1) errExit("fchown"); if (fchmod(fd, perm) == -1) errExit("fchmod"); close(fd); snprintf(cmd, sizeof(cmd), "ls -l %s", testpath); system(cmd); if (seteuid(uid) == -1) errExit("seteuid"); accessTest(testpath, 0, "0"); accessTest(testpath, R_OK, "R_OK"); accessTest(testpath, W_OK, "W_OK"); accessTest(testpath, X_OK, "X_OK"); accessTest(testpath, R_OK | W_OK, "R_OK | W_OK"); accessTest(testpath, R_OK | X_OK, "R_OK | X_OK"); accessTest(testpath, W_OK | X_OK, "W_OK | X_OK"); accessTest(testpath, R_OK | W_OK | X_OK, "R_OK | W_OK | X_OK"); exit(EXIT_SUCCESS); } /* main */ This can be run against an Ext3 filesystem as well as against an XFS filesystem. If successful, it will show: [root@andromeda src]# ./t_access_root /tmp/xxx 0 4043 4043 ---------- 1 dhowells dhowells 0 2008-12-31 03:00 /tmp/xxx access(/tmp/xxx, 0) returns 0 access(/tmp/xxx, R_OK) returns 0 access(/tmp/xxx, W_OK) returns 0 access(/tmp/xxx, X_OK) returns -1 access(/tmp/xxx, R_OK | W_OK) returns 0 access(/tmp/xxx, R_OK | X_OK) returns -1 access(/tmp/xxx, W_OK | X_OK) returns -1 access(/tmp/xxx, R_OK | W_OK | X_OK) returns -1 If unsuccessful, it will show: [root@andromeda src]# ./t_access_root /tmp/xxx 0 4043 4043 ---------- 1 dhowells dhowells 0 2008-12-31 02:56 /tmp/xxx access(/tmp/xxx, 0) returns 0 access(/tmp/xxx, R_OK) returns -1 access(/tmp/xxx, W_OK) returns -1 access(/tmp/xxx, X_OK) returns -1 access(/tmp/xxx, R_OK | W_OK) returns -1 access(/tmp/xxx, R_OK | X_OK) returns -1 access(/tmp/xxx, W_OK | X_OK) returns -1 access(/tmp/xxx, R_OK | W_OK | X_OK) returns -1 I've also tested the fix with the SELinux and syscalls LTP testsuites. Signed-off-by: David Howells <dhowells@redhat.com> Tested-by: J. Bruce Fields <bfields@citi.umich.edu> Acked-by: Serge Hallyn <serue@us.ibm.com> Signed-off-by: James Morris <jmorris@namei.org>
2009-01-07 06:27:01 +08:00
extern int cap_capable(struct task_struct *tsk, const struct cred *cred,
int cap, int audit);
extern int cap_settime(struct timespec *ts, struct timezone *tz);
extern int cap_ptrace_access_check(struct task_struct *child, unsigned int mode);
security: Fix setting of PF_SUPERPRIV by __capable() Fix the setting of PF_SUPERPRIV by __capable() as it could corrupt the flags the target process if that is not the current process and it is trying to change its own flags in a different way at the same time. __capable() is using neither atomic ops nor locking to protect t->flags. This patch removes __capable() and introduces has_capability() that doesn't set PF_SUPERPRIV on the process being queried. This patch further splits security_ptrace() in two: (1) security_ptrace_may_access(). This passes judgement on whether one process may access another only (PTRACE_MODE_ATTACH for ptrace() and PTRACE_MODE_READ for /proc), and takes a pointer to the child process. current is the parent. (2) security_ptrace_traceme(). This passes judgement on PTRACE_TRACEME only, and takes only a pointer to the parent process. current is the child. In Smack and commoncap, this uses has_capability() to determine whether the parent will be permitted to use PTRACE_ATTACH if normal checks fail. This does not set PF_SUPERPRIV. Two of the instances of __capable() actually only act on current, and so have been changed to calls to capable(). Of the places that were using __capable(): (1) The OOM killer calls __capable() thrice when weighing the killability of a process. All of these now use has_capability(). (2) cap_ptrace() and smack_ptrace() were using __capable() to check to see whether the parent was allowed to trace any process. As mentioned above, these have been split. For PTRACE_ATTACH and /proc, capable() is now used, and for PTRACE_TRACEME, has_capability() is used. (3) cap_safe_nice() only ever saw current, so now uses capable(). (4) smack_setprocattr() rejected accesses to tasks other than current just after calling __capable(), so the order of these two tests have been switched and capable() is used instead. (5) In smack_file_send_sigiotask(), we need to allow privileged processes to receive SIGIO on files they're manipulating. (6) In smack_task_wait(), we let a process wait for a privileged process, whether or not the process doing the waiting is privileged. I've tested this with the LTP SELinux and syscalls testscripts. Signed-off-by: David Howells <dhowells@redhat.com> Acked-by: Serge Hallyn <serue@us.ibm.com> Acked-by: Casey Schaufler <casey@schaufler-ca.com> Acked-by: Andrew G. Morgan <morgan@kernel.org> Acked-by: Al Viro <viro@zeniv.linux.org.uk> Signed-off-by: James Morris <jmorris@namei.org>
2008-08-14 18:37:28 +08:00
extern int cap_ptrace_traceme(struct task_struct *parent);
extern int cap_capget(struct task_struct *target, kernel_cap_t *effective, kernel_cap_t *inheritable, kernel_cap_t *permitted);
CRED: Inaugurate COW credentials Inaugurate copy-on-write credentials management. This uses RCU to manage the credentials pointer in the task_struct with respect to accesses by other tasks. A process may only modify its own credentials, and so does not need locking to access or modify its own credentials. A mutex (cred_replace_mutex) is added to the task_struct to control the effect of PTRACE_ATTACHED on credential calculations, particularly with respect to execve(). With this patch, the contents of an active credentials struct may not be changed directly; rather a new set of credentials must be prepared, modified and committed using something like the following sequence of events: struct cred *new = prepare_creds(); int ret = blah(new); if (ret < 0) { abort_creds(new); return ret; } return commit_creds(new); There are some exceptions to this rule: the keyrings pointed to by the active credentials may be instantiated - keyrings violate the COW rule as managing COW keyrings is tricky, given that it is possible for a task to directly alter the keys in a keyring in use by another task. To help enforce this, various pointers to sets of credentials, such as those in the task_struct, are declared const. The purpose of this is compile-time discouragement of altering credentials through those pointers. Once a set of credentials has been made public through one of these pointers, it may not be modified, except under special circumstances: (1) Its reference count may incremented and decremented. (2) The keyrings to which it points may be modified, but not replaced. The only safe way to modify anything else is to create a replacement and commit using the functions described in Documentation/credentials.txt (which will be added by a later patch). This patch and the preceding patches have been tested with the LTP SELinux testsuite. This patch makes several logical sets of alteration: (1) execve(). This now prepares and commits credentials in various places in the security code rather than altering the current creds directly. (2) Temporary credential overrides. do_coredump() and sys_faccessat() now prepare their own credentials and temporarily override the ones currently on the acting thread, whilst preventing interference from other threads by holding cred_replace_mutex on the thread being dumped. This will be replaced in a future patch by something that hands down the credentials directly to the functions being called, rather than altering the task's objective credentials. (3) LSM interface. A number of functions have been changed, added or removed: (*) security_capset_check(), ->capset_check() (*) security_capset_set(), ->capset_set() Removed in favour of security_capset(). (*) security_capset(), ->capset() New. This is passed a pointer to the new creds, a pointer to the old creds and the proposed capability sets. It should fill in the new creds or return an error. All pointers, barring the pointer to the new creds, are now const. (*) security_bprm_apply_creds(), ->bprm_apply_creds() Changed; now returns a value, which will cause the process to be killed if it's an error. (*) security_task_alloc(), ->task_alloc_security() Removed in favour of security_prepare_creds(). (*) security_cred_free(), ->cred_free() New. Free security data attached to cred->security. (*) security_prepare_creds(), ->cred_prepare() New. Duplicate any security data attached to cred->security. (*) security_commit_creds(), ->cred_commit() New. Apply any security effects for the upcoming installation of new security by commit_creds(). (*) security_task_post_setuid(), ->task_post_setuid() Removed in favour of security_task_fix_setuid(). (*) security_task_fix_setuid(), ->task_fix_setuid() Fix up the proposed new credentials for setuid(). This is used by cap_set_fix_setuid() to implicitly adjust capabilities in line with setuid() changes. Changes are made to the new credentials, rather than the task itself as in security_task_post_setuid(). (*) security_task_reparent_to_init(), ->task_reparent_to_init() Removed. Instead the task being reparented to init is referred directly to init's credentials. NOTE! This results in the loss of some state: SELinux's osid no longer records the sid of the thread that forked it. (*) security_key_alloc(), ->key_alloc() (*) security_key_permission(), ->key_permission() Changed. These now take cred pointers rather than task pointers to refer to the security context. (4) sys_capset(). This has been simplified and uses less locking. The LSM functions it calls have been merged. (5) reparent_to_kthreadd(). This gives the current thread the same credentials as init by simply using commit_thread() to point that way. (6) __sigqueue_alloc() and switch_uid() __sigqueue_alloc() can't stop the target task from changing its creds beneath it, so this function gets a reference to the currently applicable user_struct which it then passes into the sigqueue struct it returns if successful. switch_uid() is now called from commit_creds(), and possibly should be folded into that. commit_creds() should take care of protecting __sigqueue_alloc(). (7) [sg]et[ug]id() and co and [sg]et_current_groups. The set functions now all use prepare_creds(), commit_creds() and abort_creds() to build and check a new set of credentials before applying it. security_task_set[ug]id() is called inside the prepared section. This guarantees that nothing else will affect the creds until we've finished. The calling of set_dumpable() has been moved into commit_creds(). Much of the functionality of set_user() has been moved into commit_creds(). The get functions all simply access the data directly. (8) security_task_prctl() and cap_task_prctl(). security_task_prctl() has been modified to return -ENOSYS if it doesn't want to handle a function, or otherwise return the return value directly rather than through an argument. Additionally, cap_task_prctl() now prepares a new set of credentials, even if it doesn't end up using it. (9) Keyrings. A number of changes have been made to the keyrings code: (a) switch_uid_keyring(), copy_keys(), exit_keys() and suid_keys() have all been dropped and built in to the credentials functions directly. They may want separating out again later. (b) key_alloc() and search_process_keyrings() now take a cred pointer rather than a task pointer to specify the security context. (c) copy_creds() gives a new thread within the same thread group a new thread keyring if its parent had one, otherwise it discards the thread keyring. (d) The authorisation key now points directly to the credentials to extend the search into rather pointing to the task that carries them. (e) Installing thread, process or session keyrings causes a new set of credentials to be created, even though it's not strictly necessary for process or session keyrings (they're shared). (10) Usermode helper. The usermode helper code now carries a cred struct pointer in its subprocess_info struct instead of a new session keyring pointer. This set of credentials is derived from init_cred and installed on the new process after it has been cloned. call_usermodehelper_setup() allocates the new credentials and call_usermodehelper_freeinfo() discards them if they haven't been used. A special cred function (prepare_usermodeinfo_creds()) is provided specifically for call_usermodehelper_setup() to call. call_usermodehelper_setkeys() adjusts the credentials to sport the supplied keyring as the new session keyring. (11) SELinux. SELinux has a number of changes, in addition to those to support the LSM interface changes mentioned above: (a) selinux_setprocattr() no longer does its check for whether the current ptracer can access processes with the new SID inside the lock that covers getting the ptracer's SID. Whilst this lock ensures that the check is done with the ptracer pinned, the result is only valid until the lock is released, so there's no point doing it inside the lock. (12) is_single_threaded(). This function has been extracted from selinux_setprocattr() and put into a file of its own in the lib/ directory as join_session_keyring() now wants to use it too. The code in SELinux just checked to see whether a task shared mm_structs with other tasks (CLONE_VM), but that isn't good enough. We really want to know if they're part of the same thread group (CLONE_THREAD). (13) nfsd. The NFS server daemon now has to use the COW credentials to set the credentials it is going to use. It really needs to pass the credentials down to the functions it calls, but it can't do that until other patches in this series have been applied. Signed-off-by: David Howells <dhowells@redhat.com> Acked-by: James Morris <jmorris@namei.org> Signed-off-by: James Morris <jmorris@namei.org>
2008-11-14 07:39:23 +08:00
extern int cap_capset(struct cred *new, const struct cred *old,
const kernel_cap_t *effective,
const kernel_cap_t *inheritable,
const kernel_cap_t *permitted);
CRED: Make execve() take advantage of copy-on-write credentials Make execve() take advantage of copy-on-write credentials, allowing it to set up the credentials in advance, and then commit the whole lot after the point of no return. This patch and the preceding patches have been tested with the LTP SELinux testsuite. This patch makes several logical sets of alteration: (1) execve(). The credential bits from struct linux_binprm are, for the most part, replaced with a single credentials pointer (bprm->cred). This means that all the creds can be calculated in advance and then applied at the point of no return with no possibility of failure. I would like to replace bprm->cap_effective with: cap_isclear(bprm->cap_effective) but this seems impossible due to special behaviour for processes of pid 1 (they always retain their parent's capability masks where normally they'd be changed - see cap_bprm_set_creds()). The following sequence of events now happens: (a) At the start of do_execve, the current task's cred_exec_mutex is locked to prevent PTRACE_ATTACH from obsoleting the calculation of creds that we make. (a) prepare_exec_creds() is then called to make a copy of the current task's credentials and prepare it. This copy is then assigned to bprm->cred. This renders security_bprm_alloc() and security_bprm_free() unnecessary, and so they've been removed. (b) The determination of unsafe execution is now performed immediately after (a) rather than later on in the code. The result is stored in bprm->unsafe for future reference. (c) prepare_binprm() is called, possibly multiple times. (i) This applies the result of set[ug]id binaries to the new creds attached to bprm->cred. Personality bit clearance is recorded, but now deferred on the basis that the exec procedure may yet fail. (ii) This then calls the new security_bprm_set_creds(). This should calculate the new LSM and capability credentials into *bprm->cred. This folds together security_bprm_set() and parts of security_bprm_apply_creds() (these two have been removed). Anything that might fail must be done at this point. (iii) bprm->cred_prepared is set to 1. bprm->cred_prepared is 0 on the first pass of the security calculations, and 1 on all subsequent passes. This allows SELinux in (ii) to base its calculations only on the initial script and not on the interpreter. (d) flush_old_exec() is called to commit the task to execution. This performs the following steps with regard to credentials: (i) Clear pdeath_signal and set dumpable on certain circumstances that may not be covered by commit_creds(). (ii) Clear any bits in current->personality that were deferred from (c.i). (e) install_exec_creds() [compute_creds() as was] is called to install the new credentials. This performs the following steps with regard to credentials: (i) Calls security_bprm_committing_creds() to apply any security requirements, such as flushing unauthorised files in SELinux, that must be done before the credentials are changed. This is made up of bits of security_bprm_apply_creds() and security_bprm_post_apply_creds(), both of which have been removed. This function is not allowed to fail; anything that might fail must have been done in (c.ii). (ii) Calls commit_creds() to apply the new credentials in a single assignment (more or less). Possibly pdeath_signal and dumpable should be part of struct creds. (iii) Unlocks the task's cred_replace_mutex, thus allowing PTRACE_ATTACH to take place. (iv) Clears The bprm->cred pointer as the credentials it was holding are now immutable. (v) Calls security_bprm_committed_creds() to apply any security alterations that must be done after the creds have been changed. SELinux uses this to flush signals and signal handlers. (f) If an error occurs before (d.i), bprm_free() will call abort_creds() to destroy the proposed new credentials and will then unlock cred_replace_mutex. No changes to the credentials will have been made. (2) LSM interface. A number of functions have been changed, added or removed: (*) security_bprm_alloc(), ->bprm_alloc_security() (*) security_bprm_free(), ->bprm_free_security() Removed in favour of preparing new credentials and modifying those. (*) security_bprm_apply_creds(), ->bprm_apply_creds() (*) security_bprm_post_apply_creds(), ->bprm_post_apply_creds() Removed; split between security_bprm_set_creds(), security_bprm_committing_creds() and security_bprm_committed_creds(). (*) security_bprm_set(), ->bprm_set_security() Removed; folded into security_bprm_set_creds(). (*) security_bprm_set_creds(), ->bprm_set_creds() New. The new credentials in bprm->creds should be checked and set up as appropriate. bprm->cred_prepared is 0 on the first call, 1 on the second and subsequent calls. (*) security_bprm_committing_creds(), ->bprm_committing_creds() (*) security_bprm_committed_creds(), ->bprm_committed_creds() New. Apply the security effects of the new credentials. This includes closing unauthorised files in SELinux. This function may not fail. When the former is called, the creds haven't yet been applied to the process; when the latter is called, they have. The former may access bprm->cred, the latter may not. (3) SELinux. SELinux has a number of changes, in addition to those to support the LSM interface changes mentioned above: (a) The bprm_security_struct struct has been removed in favour of using the credentials-under-construction approach. (c) flush_unauthorized_files() now takes a cred pointer and passes it on to inode_has_perm(), file_has_perm() and dentry_open(). Signed-off-by: David Howells <dhowells@redhat.com> Acked-by: James Morris <jmorris@namei.org> Acked-by: Serge Hallyn <serue@us.ibm.com> Signed-off-by: James Morris <jmorris@namei.org>
2008-11-14 07:39:24 +08:00
extern int cap_bprm_set_creds(struct linux_binprm *bprm);
extern int cap_bprm_secureexec(struct linux_binprm *bprm);
extern int cap_inode_setxattr(struct dentry *dentry, const char *name,
const void *value, size_t size, int flags);
extern int cap_inode_removexattr(struct dentry *dentry, const char *name);
Implement file posix capabilities Implement file posix capabilities. This allows programs to be given a subset of root's powers regardless of who runs them, without having to use setuid and giving the binary all of root's powers. This version works with Kaigai Kohei's userspace tools, found at http://www.kaigai.gr.jp/index.php. For more information on how to use this patch, Chris Friedhoff has posted a nice page at http://www.friedhoff.org/fscaps.html. Changelog: Nov 27: Incorporate fixes from Andrew Morton (security-introduce-file-caps-tweaks and security-introduce-file-caps-warning-fix) Fix Kconfig dependency. Fix change signaling behavior when file caps are not compiled in. Nov 13: Integrate comments from Alexey: Remove CONFIG_ ifdef from capability.h, and use %zd for printing a size_t. Nov 13: Fix endianness warnings by sparse as suggested by Alexey Dobriyan. Nov 09: Address warnings of unused variables at cap_bprm_set_security when file capabilities are disabled, and simultaneously clean up the code a little, by pulling the new code into a helper function. Nov 08: For pointers to required userspace tools and how to use them, see http://www.friedhoff.org/fscaps.html. Nov 07: Fix the calculation of the highest bit checked in check_cap_sanity(). Nov 07: Allow file caps to be enabled without CONFIG_SECURITY, since capabilities are the default. Hook cap_task_setscheduler when !CONFIG_SECURITY. Move capable(TASK_KILL) to end of cap_task_kill to reduce audit messages. Nov 05: Add secondary calls in selinux/hooks.c to task_setioprio and task_setscheduler so that selinux and capabilities with file cap support can be stacked. Sep 05: As Seth Arnold points out, uid checks are out of place for capability code. Sep 01: Define task_setscheduler, task_setioprio, cap_task_kill, and task_setnice to make sure a user cannot affect a process in which they called a program with some fscaps. One remaining question is the note under task_setscheduler: are we ok with CAP_SYS_NICE being sufficient to confine a process to a cpuset? It is a semantic change, as without fsccaps, attach_task doesn't allow CAP_SYS_NICE to override the uid equivalence check. But since it uses security_task_setscheduler, which elsewhere is used where CAP_SYS_NICE can be used to override the uid equivalence check, fixing it might be tough. task_setscheduler note: this also controls cpuset:attach_task. Are we ok with CAP_SYS_NICE being used to confine to a cpuset? task_setioprio task_setnice sys_setpriority uses this (through set_one_prio) for another process. Need same checks as setrlimit Aug 21: Updated secureexec implementation to reflect the fact that euid and uid might be the same and nonzero, but the process might still have elevated caps. Aug 15: Handle endianness of xattrs. Enforce capability version match between kernel and disk. Enforce that no bits beyond the known max capability are set, else return -EPERM. With this extra processing, it may be worth reconsidering doing all the work at bprm_set_security rather than d_instantiate. Aug 10: Always call getxattr at bprm_set_security, rather than caching it at d_instantiate. [morgan@kernel.org: file-caps clean up for linux/capability.h] [bunk@kernel.org: unexport cap_inode_killpriv] Signed-off-by: Serge E. Hallyn <serue@us.ibm.com> Cc: Stephen Smalley <sds@tycho.nsa.gov> Cc: James Morris <jmorris@namei.org> Cc: Chris Wright <chrisw@sous-sol.org> Cc: Andrew Morgan <morgan@kernel.org> Signed-off-by: Andrew Morgan <morgan@kernel.org> Signed-off-by: Adrian Bunk <bunk@kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2007-10-17 14:31:36 +08:00
extern int cap_inode_need_killpriv(struct dentry *dentry);
extern int cap_inode_killpriv(struct dentry *dentry);
extern int cap_file_mmap(struct file *file, unsigned long reqprot,
unsigned long prot, unsigned long flags,
unsigned long addr, unsigned long addr_only);
CRED: Inaugurate COW credentials Inaugurate copy-on-write credentials management. This uses RCU to manage the credentials pointer in the task_struct with respect to accesses by other tasks. A process may only modify its own credentials, and so does not need locking to access or modify its own credentials. A mutex (cred_replace_mutex) is added to the task_struct to control the effect of PTRACE_ATTACHED on credential calculations, particularly with respect to execve(). With this patch, the contents of an active credentials struct may not be changed directly; rather a new set of credentials must be prepared, modified and committed using something like the following sequence of events: struct cred *new = prepare_creds(); int ret = blah(new); if (ret < 0) { abort_creds(new); return ret; } return commit_creds(new); There are some exceptions to this rule: the keyrings pointed to by the active credentials may be instantiated - keyrings violate the COW rule as managing COW keyrings is tricky, given that it is possible for a task to directly alter the keys in a keyring in use by another task. To help enforce this, various pointers to sets of credentials, such as those in the task_struct, are declared const. The purpose of this is compile-time discouragement of altering credentials through those pointers. Once a set of credentials has been made public through one of these pointers, it may not be modified, except under special circumstances: (1) Its reference count may incremented and decremented. (2) The keyrings to which it points may be modified, but not replaced. The only safe way to modify anything else is to create a replacement and commit using the functions described in Documentation/credentials.txt (which will be added by a later patch). This patch and the preceding patches have been tested with the LTP SELinux testsuite. This patch makes several logical sets of alteration: (1) execve(). This now prepares and commits credentials in various places in the security code rather than altering the current creds directly. (2) Temporary credential overrides. do_coredump() and sys_faccessat() now prepare their own credentials and temporarily override the ones currently on the acting thread, whilst preventing interference from other threads by holding cred_replace_mutex on the thread being dumped. This will be replaced in a future patch by something that hands down the credentials directly to the functions being called, rather than altering the task's objective credentials. (3) LSM interface. A number of functions have been changed, added or removed: (*) security_capset_check(), ->capset_check() (*) security_capset_set(), ->capset_set() Removed in favour of security_capset(). (*) security_capset(), ->capset() New. This is passed a pointer to the new creds, a pointer to the old creds and the proposed capability sets. It should fill in the new creds or return an error. All pointers, barring the pointer to the new creds, are now const. (*) security_bprm_apply_creds(), ->bprm_apply_creds() Changed; now returns a value, which will cause the process to be killed if it's an error. (*) security_task_alloc(), ->task_alloc_security() Removed in favour of security_prepare_creds(). (*) security_cred_free(), ->cred_free() New. Free security data attached to cred->security. (*) security_prepare_creds(), ->cred_prepare() New. Duplicate any security data attached to cred->security. (*) security_commit_creds(), ->cred_commit() New. Apply any security effects for the upcoming installation of new security by commit_creds(). (*) security_task_post_setuid(), ->task_post_setuid() Removed in favour of security_task_fix_setuid(). (*) security_task_fix_setuid(), ->task_fix_setuid() Fix up the proposed new credentials for setuid(). This is used by cap_set_fix_setuid() to implicitly adjust capabilities in line with setuid() changes. Changes are made to the new credentials, rather than the task itself as in security_task_post_setuid(). (*) security_task_reparent_to_init(), ->task_reparent_to_init() Removed. Instead the task being reparented to init is referred directly to init's credentials. NOTE! This results in the loss of some state: SELinux's osid no longer records the sid of the thread that forked it. (*) security_key_alloc(), ->key_alloc() (*) security_key_permission(), ->key_permission() Changed. These now take cred pointers rather than task pointers to refer to the security context. (4) sys_capset(). This has been simplified and uses less locking. The LSM functions it calls have been merged. (5) reparent_to_kthreadd(). This gives the current thread the same credentials as init by simply using commit_thread() to point that way. (6) __sigqueue_alloc() and switch_uid() __sigqueue_alloc() can't stop the target task from changing its creds beneath it, so this function gets a reference to the currently applicable user_struct which it then passes into the sigqueue struct it returns if successful. switch_uid() is now called from commit_creds(), and possibly should be folded into that. commit_creds() should take care of protecting __sigqueue_alloc(). (7) [sg]et[ug]id() and co and [sg]et_current_groups. The set functions now all use prepare_creds(), commit_creds() and abort_creds() to build and check a new set of credentials before applying it. security_task_set[ug]id() is called inside the prepared section. This guarantees that nothing else will affect the creds until we've finished. The calling of set_dumpable() has been moved into commit_creds(). Much of the functionality of set_user() has been moved into commit_creds(). The get functions all simply access the data directly. (8) security_task_prctl() and cap_task_prctl(). security_task_prctl() has been modified to return -ENOSYS if it doesn't want to handle a function, or otherwise return the return value directly rather than through an argument. Additionally, cap_task_prctl() now prepares a new set of credentials, even if it doesn't end up using it. (9) Keyrings. A number of changes have been made to the keyrings code: (a) switch_uid_keyring(), copy_keys(), exit_keys() and suid_keys() have all been dropped and built in to the credentials functions directly. They may want separating out again later. (b) key_alloc() and search_process_keyrings() now take a cred pointer rather than a task pointer to specify the security context. (c) copy_creds() gives a new thread within the same thread group a new thread keyring if its parent had one, otherwise it discards the thread keyring. (d) The authorisation key now points directly to the credentials to extend the search into rather pointing to the task that carries them. (e) Installing thread, process or session keyrings causes a new set of credentials to be created, even though it's not strictly necessary for process or session keyrings (they're shared). (10) Usermode helper. The usermode helper code now carries a cred struct pointer in its subprocess_info struct instead of a new session keyring pointer. This set of credentials is derived from init_cred and installed on the new process after it has been cloned. call_usermodehelper_setup() allocates the new credentials and call_usermodehelper_freeinfo() discards them if they haven't been used. A special cred function (prepare_usermodeinfo_creds()) is provided specifically for call_usermodehelper_setup() to call. call_usermodehelper_setkeys() adjusts the credentials to sport the supplied keyring as the new session keyring. (11) SELinux. SELinux has a number of changes, in addition to those to support the LSM interface changes mentioned above: (a) selinux_setprocattr() no longer does its check for whether the current ptracer can access processes with the new SID inside the lock that covers getting the ptracer's SID. Whilst this lock ensures that the check is done with the ptracer pinned, the result is only valid until the lock is released, so there's no point doing it inside the lock. (12) is_single_threaded(). This function has been extracted from selinux_setprocattr() and put into a file of its own in the lib/ directory as join_session_keyring() now wants to use it too. The code in SELinux just checked to see whether a task shared mm_structs with other tasks (CLONE_VM), but that isn't good enough. We really want to know if they're part of the same thread group (CLONE_THREAD). (13) nfsd. The NFS server daemon now has to use the COW credentials to set the credentials it is going to use. It really needs to pass the credentials down to the functions it calls, but it can't do that until other patches in this series have been applied. Signed-off-by: David Howells <dhowells@redhat.com> Acked-by: James Morris <jmorris@namei.org> Signed-off-by: James Morris <jmorris@namei.org>
2008-11-14 07:39:23 +08:00
extern int cap_task_fix_setuid(struct cred *new, const struct cred *old, int flags);
capabilities: implement per-process securebits Filesystem capability support makes it possible to do away with (set)uid-0 based privilege and use capabilities instead. That is, with filesystem support for capabilities but without this present patch, it is (conceptually) possible to manage a system with capabilities alone and never need to obtain privilege via (set)uid-0. Of course, conceptually isn't quite the same as currently possible since few user applications, certainly not enough to run a viable system, are currently prepared to leverage capabilities to exercise privilege. Further, many applications exist that may never get upgraded in this way, and the kernel will continue to want to support their setuid-0 base privilege needs. Where pure-capability applications evolve and replace setuid-0 binaries, it is desirable that there be a mechanisms by which they can contain their privilege. In addition to leveraging the per-process bounding and inheritable sets, this should include suppressing the privilege of the uid-0 superuser from the process' tree of children. The feature added by this patch can be leveraged to suppress the privilege associated with (set)uid-0. This suppression requires CAP_SETPCAP to initiate, and only immediately affects the 'current' process (it is inherited through fork()/exec()). This reimplementation differs significantly from the historical support for securebits which was system-wide, unwieldy and which has ultimately withered to a dead relic in the source of the modern kernel. With this patch applied a process, that is capable(CAP_SETPCAP), can now drop all legacy privilege (through uid=0) for itself and all subsequently fork()'d/exec()'d children with: prctl(PR_SET_SECUREBITS, 0x2f); This patch represents a no-op unless CONFIG_SECURITY_FILE_CAPABILITIES is enabled at configure time. [akpm@linux-foundation.org: fix uninitialised var warning] [serue@us.ibm.com: capabilities: use cap_task_prctl when !CONFIG_SECURITY] Signed-off-by: Andrew G. Morgan <morgan@kernel.org> Acked-by: Serge Hallyn <serue@us.ibm.com> Reviewed-by: James Morris <jmorris@namei.org> Cc: Stephen Smalley <sds@tycho.nsa.gov> Cc: Paul Moore <paul.moore@hp.com> Signed-off-by: Serge E. Hallyn <serue@us.ibm.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2008-04-28 17:13:40 +08:00
extern int cap_task_prctl(int option, unsigned long arg2, unsigned long arg3,
CRED: Inaugurate COW credentials Inaugurate copy-on-write credentials management. This uses RCU to manage the credentials pointer in the task_struct with respect to accesses by other tasks. A process may only modify its own credentials, and so does not need locking to access or modify its own credentials. A mutex (cred_replace_mutex) is added to the task_struct to control the effect of PTRACE_ATTACHED on credential calculations, particularly with respect to execve(). With this patch, the contents of an active credentials struct may not be changed directly; rather a new set of credentials must be prepared, modified and committed using something like the following sequence of events: struct cred *new = prepare_creds(); int ret = blah(new); if (ret < 0) { abort_creds(new); return ret; } return commit_creds(new); There are some exceptions to this rule: the keyrings pointed to by the active credentials may be instantiated - keyrings violate the COW rule as managing COW keyrings is tricky, given that it is possible for a task to directly alter the keys in a keyring in use by another task. To help enforce this, various pointers to sets of credentials, such as those in the task_struct, are declared const. The purpose of this is compile-time discouragement of altering credentials through those pointers. Once a set of credentials has been made public through one of these pointers, it may not be modified, except under special circumstances: (1) Its reference count may incremented and decremented. (2) The keyrings to which it points may be modified, but not replaced. The only safe way to modify anything else is to create a replacement and commit using the functions described in Documentation/credentials.txt (which will be added by a later patch). This patch and the preceding patches have been tested with the LTP SELinux testsuite. This patch makes several logical sets of alteration: (1) execve(). This now prepares and commits credentials in various places in the security code rather than altering the current creds directly. (2) Temporary credential overrides. do_coredump() and sys_faccessat() now prepare their own credentials and temporarily override the ones currently on the acting thread, whilst preventing interference from other threads by holding cred_replace_mutex on the thread being dumped. This will be replaced in a future patch by something that hands down the credentials directly to the functions being called, rather than altering the task's objective credentials. (3) LSM interface. A number of functions have been changed, added or removed: (*) security_capset_check(), ->capset_check() (*) security_capset_set(), ->capset_set() Removed in favour of security_capset(). (*) security_capset(), ->capset() New. This is passed a pointer to the new creds, a pointer to the old creds and the proposed capability sets. It should fill in the new creds or return an error. All pointers, barring the pointer to the new creds, are now const. (*) security_bprm_apply_creds(), ->bprm_apply_creds() Changed; now returns a value, which will cause the process to be killed if it's an error. (*) security_task_alloc(), ->task_alloc_security() Removed in favour of security_prepare_creds(). (*) security_cred_free(), ->cred_free() New. Free security data attached to cred->security. (*) security_prepare_creds(), ->cred_prepare() New. Duplicate any security data attached to cred->security. (*) security_commit_creds(), ->cred_commit() New. Apply any security effects for the upcoming installation of new security by commit_creds(). (*) security_task_post_setuid(), ->task_post_setuid() Removed in favour of security_task_fix_setuid(). (*) security_task_fix_setuid(), ->task_fix_setuid() Fix up the proposed new credentials for setuid(). This is used by cap_set_fix_setuid() to implicitly adjust capabilities in line with setuid() changes. Changes are made to the new credentials, rather than the task itself as in security_task_post_setuid(). (*) security_task_reparent_to_init(), ->task_reparent_to_init() Removed. Instead the task being reparented to init is referred directly to init's credentials. NOTE! This results in the loss of some state: SELinux's osid no longer records the sid of the thread that forked it. (*) security_key_alloc(), ->key_alloc() (*) security_key_permission(), ->key_permission() Changed. These now take cred pointers rather than task pointers to refer to the security context. (4) sys_capset(). This has been simplified and uses less locking. The LSM functions it calls have been merged. (5) reparent_to_kthreadd(). This gives the current thread the same credentials as init by simply using commit_thread() to point that way. (6) __sigqueue_alloc() and switch_uid() __sigqueue_alloc() can't stop the target task from changing its creds beneath it, so this function gets a reference to the currently applicable user_struct which it then passes into the sigqueue struct it returns if successful. switch_uid() is now called from commit_creds(), and possibly should be folded into that. commit_creds() should take care of protecting __sigqueue_alloc(). (7) [sg]et[ug]id() and co and [sg]et_current_groups. The set functions now all use prepare_creds(), commit_creds() and abort_creds() to build and check a new set of credentials before applying it. security_task_set[ug]id() is called inside the prepared section. This guarantees that nothing else will affect the creds until we've finished. The calling of set_dumpable() has been moved into commit_creds(). Much of the functionality of set_user() has been moved into commit_creds(). The get functions all simply access the data directly. (8) security_task_prctl() and cap_task_prctl(). security_task_prctl() has been modified to return -ENOSYS if it doesn't want to handle a function, or otherwise return the return value directly rather than through an argument. Additionally, cap_task_prctl() now prepares a new set of credentials, even if it doesn't end up using it. (9) Keyrings. A number of changes have been made to the keyrings code: (a) switch_uid_keyring(), copy_keys(), exit_keys() and suid_keys() have all been dropped and built in to the credentials functions directly. They may want separating out again later. (b) key_alloc() and search_process_keyrings() now take a cred pointer rather than a task pointer to specify the security context. (c) copy_creds() gives a new thread within the same thread group a new thread keyring if its parent had one, otherwise it discards the thread keyring. (d) The authorisation key now points directly to the credentials to extend the search into rather pointing to the task that carries them. (e) Installing thread, process or session keyrings causes a new set of credentials to be created, even though it's not strictly necessary for process or session keyrings (they're shared). (10) Usermode helper. The usermode helper code now carries a cred struct pointer in its subprocess_info struct instead of a new session keyring pointer. This set of credentials is derived from init_cred and installed on the new process after it has been cloned. call_usermodehelper_setup() allocates the new credentials and call_usermodehelper_freeinfo() discards them if they haven't been used. A special cred function (prepare_usermodeinfo_creds()) is provided specifically for call_usermodehelper_setup() to call. call_usermodehelper_setkeys() adjusts the credentials to sport the supplied keyring as the new session keyring. (11) SELinux. SELinux has a number of changes, in addition to those to support the LSM interface changes mentioned above: (a) selinux_setprocattr() no longer does its check for whether the current ptracer can access processes with the new SID inside the lock that covers getting the ptracer's SID. Whilst this lock ensures that the check is done with the ptracer pinned, the result is only valid until the lock is released, so there's no point doing it inside the lock. (12) is_single_threaded(). This function has been extracted from selinux_setprocattr() and put into a file of its own in the lib/ directory as join_session_keyring() now wants to use it too. The code in SELinux just checked to see whether a task shared mm_structs with other tasks (CLONE_VM), but that isn't good enough. We really want to know if they're part of the same thread group (CLONE_THREAD). (13) nfsd. The NFS server daemon now has to use the COW credentials to set the credentials it is going to use. It really needs to pass the credentials down to the functions it calls, but it can't do that until other patches in this series have been applied. Signed-off-by: David Howells <dhowells@redhat.com> Acked-by: James Morris <jmorris@namei.org> Signed-off-by: James Morris <jmorris@namei.org>
2008-11-14 07:39:23 +08:00
unsigned long arg4, unsigned long arg5);
extern int cap_task_setscheduler(struct task_struct *p);
extern int cap_task_setioprio(struct task_struct *p, int ioprio);
extern int cap_task_setnice(struct task_struct *p, int nice);
extern int cap_vm_enough_memory(struct mm_struct *mm, long pages);
struct msghdr;
struct sk_buff;
struct sock;
struct sockaddr;
struct socket;
[LSM-IPSec]: Security association restriction. This patch series implements per packet access control via the extension of the Linux Security Modules (LSM) interface by hooks in the XFRM and pfkey subsystems that leverage IPSec security associations to label packets. Extensions to the SELinux LSM are included that leverage the patch for this purpose. This patch implements the changes necessary to the XFRM subsystem, pfkey interface, ipv4/ipv6, and xfrm_user interface to restrict a socket to use only authorized security associations (or no security association) to send/receive network packets. Patch purpose: The patch is designed to enable access control per packets based on the strongly authenticated IPSec security association. Such access controls augment the existing ones based on network interface and IP address. The former are very coarse-grained, and the latter can be spoofed. By using IPSec, the system can control access to remote hosts based on cryptographic keys generated using the IPSec mechanism. This enables access control on a per-machine basis or per-application if the remote machine is running the same mechanism and trusted to enforce the access control policy. Patch design approach: The overall approach is that policy (xfrm_policy) entries set by user-level programs (e.g., setkey for ipsec-tools) are extended with a security context that is used at policy selection time in the XFRM subsystem to restrict the sockets that can send/receive packets via security associations (xfrm_states) that are built from those policies. A presentation available at www.selinux-symposium.org/2005/presentations/session2/2-3-jaeger.pdf from the SELinux symposium describes the overall approach. Patch implementation details: On output, the policy retrieved (via xfrm_policy_lookup or xfrm_sk_policy_lookup) must be authorized for the security context of the socket and the same security context is required for resultant security association (retrieved or negotiated via racoon in ipsec-tools). This is enforced in xfrm_state_find. On input, the policy retrieved must also be authorized for the socket (at __xfrm_policy_check), and the security context of the policy must also match the security association being used. The patch has virtually no impact on packets that do not use IPSec. The existing Netfilter (outgoing) and LSM rcv_skb hooks are used as before. Also, if IPSec is used without security contexts, the impact is minimal. The LSM must allow such policies to be selected for the combination of socket and remote machine, but subsequent IPSec processing proceeds as in the original case. Testing: The pfkey interface is tested using the ipsec-tools. ipsec-tools have been modified (a separate ipsec-tools patch is available for version 0.5) that supports assignment of xfrm_policy entries and security associations with security contexts via setkey and the negotiation using the security contexts via racoon. The xfrm_user interface is tested via ad hoc programs that set security contexts. These programs are also available from me, and contain programs for setting, getting, and deleting policy for testing this interface. Testing of sa functions was done by tracing kernel behavior. Signed-off-by: Trent Jaeger <tjaeger@cse.psu.edu> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> Signed-off-by: David S. Miller <davem@davemloft.net>
2005-12-14 15:12:27 +08:00
struct flowi;
struct dst_entry;
struct xfrm_selector;
struct xfrm_policy;
struct xfrm_state;
struct xfrm_user_sec_ctx;
struct seq_file;
extern int cap_netlink_send(struct sock *sk, struct sk_buff *skb);
extern int cap_netlink_recv(struct sk_buff *skb, int cap);
void reset_security_ops(void);
#ifdef CONFIG_MMU
extern unsigned long mmap_min_addr;
extern unsigned long dac_mmap_min_addr;
#else
#define dac_mmap_min_addr 0UL
#endif
/*
* Values used in the task_security_ops calls
*/
/* setuid or setgid, id0 == uid or gid */
#define LSM_SETID_ID 1
/* setreuid or setregid, id0 == real, id1 == eff */
#define LSM_SETID_RE 2
/* setresuid or setresgid, id0 == real, id1 == eff, uid2 == saved */
#define LSM_SETID_RES 4
/* setfsuid or setfsgid, id0 == fsuid or fsgid */
#define LSM_SETID_FS 8
/* forward declares to avoid warnings */
struct sched_param;
struct request_sock;
CRED: Make execve() take advantage of copy-on-write credentials Make execve() take advantage of copy-on-write credentials, allowing it to set up the credentials in advance, and then commit the whole lot after the point of no return. This patch and the preceding patches have been tested with the LTP SELinux testsuite. This patch makes several logical sets of alteration: (1) execve(). The credential bits from struct linux_binprm are, for the most part, replaced with a single credentials pointer (bprm->cred). This means that all the creds can be calculated in advance and then applied at the point of no return with no possibility of failure. I would like to replace bprm->cap_effective with: cap_isclear(bprm->cap_effective) but this seems impossible due to special behaviour for processes of pid 1 (they always retain their parent's capability masks where normally they'd be changed - see cap_bprm_set_creds()). The following sequence of events now happens: (a) At the start of do_execve, the current task's cred_exec_mutex is locked to prevent PTRACE_ATTACH from obsoleting the calculation of creds that we make. (a) prepare_exec_creds() is then called to make a copy of the current task's credentials and prepare it. This copy is then assigned to bprm->cred. This renders security_bprm_alloc() and security_bprm_free() unnecessary, and so they've been removed. (b) The determination of unsafe execution is now performed immediately after (a) rather than later on in the code. The result is stored in bprm->unsafe for future reference. (c) prepare_binprm() is called, possibly multiple times. (i) This applies the result of set[ug]id binaries to the new creds attached to bprm->cred. Personality bit clearance is recorded, but now deferred on the basis that the exec procedure may yet fail. (ii) This then calls the new security_bprm_set_creds(). This should calculate the new LSM and capability credentials into *bprm->cred. This folds together security_bprm_set() and parts of security_bprm_apply_creds() (these two have been removed). Anything that might fail must be done at this point. (iii) bprm->cred_prepared is set to 1. bprm->cred_prepared is 0 on the first pass of the security calculations, and 1 on all subsequent passes. This allows SELinux in (ii) to base its calculations only on the initial script and not on the interpreter. (d) flush_old_exec() is called to commit the task to execution. This performs the following steps with regard to credentials: (i) Clear pdeath_signal and set dumpable on certain circumstances that may not be covered by commit_creds(). (ii) Clear any bits in current->personality that were deferred from (c.i). (e) install_exec_creds() [compute_creds() as was] is called to install the new credentials. This performs the following steps with regard to credentials: (i) Calls security_bprm_committing_creds() to apply any security requirements, such as flushing unauthorised files in SELinux, that must be done before the credentials are changed. This is made up of bits of security_bprm_apply_creds() and security_bprm_post_apply_creds(), both of which have been removed. This function is not allowed to fail; anything that might fail must have been done in (c.ii). (ii) Calls commit_creds() to apply the new credentials in a single assignment (more or less). Possibly pdeath_signal and dumpable should be part of struct creds. (iii) Unlocks the task's cred_replace_mutex, thus allowing PTRACE_ATTACH to take place. (iv) Clears The bprm->cred pointer as the credentials it was holding are now immutable. (v) Calls security_bprm_committed_creds() to apply any security alterations that must be done after the creds have been changed. SELinux uses this to flush signals and signal handlers. (f) If an error occurs before (d.i), bprm_free() will call abort_creds() to destroy the proposed new credentials and will then unlock cred_replace_mutex. No changes to the credentials will have been made. (2) LSM interface. A number of functions have been changed, added or removed: (*) security_bprm_alloc(), ->bprm_alloc_security() (*) security_bprm_free(), ->bprm_free_security() Removed in favour of preparing new credentials and modifying those. (*) security_bprm_apply_creds(), ->bprm_apply_creds() (*) security_bprm_post_apply_creds(), ->bprm_post_apply_creds() Removed; split between security_bprm_set_creds(), security_bprm_committing_creds() and security_bprm_committed_creds(). (*) security_bprm_set(), ->bprm_set_security() Removed; folded into security_bprm_set_creds(). (*) security_bprm_set_creds(), ->bprm_set_creds() New. The new credentials in bprm->creds should be checked and set up as appropriate. bprm->cred_prepared is 0 on the first call, 1 on the second and subsequent calls. (*) security_bprm_committing_creds(), ->bprm_committing_creds() (*) security_bprm_committed_creds(), ->bprm_committed_creds() New. Apply the security effects of the new credentials. This includes closing unauthorised files in SELinux. This function may not fail. When the former is called, the creds haven't yet been applied to the process; when the latter is called, they have. The former may access bprm->cred, the latter may not. (3) SELinux. SELinux has a number of changes, in addition to those to support the LSM interface changes mentioned above: (a) The bprm_security_struct struct has been removed in favour of using the credentials-under-construction approach. (c) flush_unauthorized_files() now takes a cred pointer and passes it on to inode_has_perm(), file_has_perm() and dentry_open(). Signed-off-by: David Howells <dhowells@redhat.com> Acked-by: James Morris <jmorris@namei.org> Acked-by: Serge Hallyn <serue@us.ibm.com> Signed-off-by: James Morris <jmorris@namei.org>
2008-11-14 07:39:24 +08:00
/* bprm->unsafe reasons */
#define LSM_UNSAFE_SHARE 1
#define LSM_UNSAFE_PTRACE 2
#define LSM_UNSAFE_PTRACE_CAP 4
#ifdef CONFIG_MMU
/*
* If a hint addr is less than mmap_min_addr change hint to be as
* low as possible but still greater than mmap_min_addr
*/
static inline unsigned long round_hint_to_min(unsigned long hint)
{
hint &= PAGE_MASK;
if (((void *)hint != NULL) &&
(hint < mmap_min_addr))
return PAGE_ALIGN(mmap_min_addr);
return hint;
}
extern int mmap_min_addr_handler(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos);
#endif
#ifdef CONFIG_SECURITY
struct security_mnt_opts {
char **mnt_opts;
int *mnt_opts_flags;
int num_mnt_opts;
};
static inline void security_init_mnt_opts(struct security_mnt_opts *opts)
{
opts->mnt_opts = NULL;
opts->mnt_opts_flags = NULL;
opts->num_mnt_opts = 0;
}
static inline void security_free_mnt_opts(struct security_mnt_opts *opts)
{
int i;
if (opts->mnt_opts)
for (i = 0; i < opts->num_mnt_opts; i++)
kfree(opts->mnt_opts[i]);
kfree(opts->mnt_opts);
opts->mnt_opts = NULL;
kfree(opts->mnt_opts_flags);
opts->mnt_opts_flags = NULL;
opts->num_mnt_opts = 0;
}
/**
* struct security_operations - main security structure
*
* Security module identifier.
*
* @name:
* A string that acts as a unique identifeir for the LSM with max number
* of characters = SECURITY_NAME_MAX.
*
* Security hooks for program execution operations.
*
CRED: Make execve() take advantage of copy-on-write credentials Make execve() take advantage of copy-on-write credentials, allowing it to set up the credentials in advance, and then commit the whole lot after the point of no return. This patch and the preceding patches have been tested with the LTP SELinux testsuite. This patch makes several logical sets of alteration: (1) execve(). The credential bits from struct linux_binprm are, for the most part, replaced with a single credentials pointer (bprm->cred). This means that all the creds can be calculated in advance and then applied at the point of no return with no possibility of failure. I would like to replace bprm->cap_effective with: cap_isclear(bprm->cap_effective) but this seems impossible due to special behaviour for processes of pid 1 (they always retain their parent's capability masks where normally they'd be changed - see cap_bprm_set_creds()). The following sequence of events now happens: (a) At the start of do_execve, the current task's cred_exec_mutex is locked to prevent PTRACE_ATTACH from obsoleting the calculation of creds that we make. (a) prepare_exec_creds() is then called to make a copy of the current task's credentials and prepare it. This copy is then assigned to bprm->cred. This renders security_bprm_alloc() and security_bprm_free() unnecessary, and so they've been removed. (b) The determination of unsafe execution is now performed immediately after (a) rather than later on in the code. The result is stored in bprm->unsafe for future reference. (c) prepare_binprm() is called, possibly multiple times. (i) This applies the result of set[ug]id binaries to the new creds attached to bprm->cred. Personality bit clearance is recorded, but now deferred on the basis that the exec procedure may yet fail. (ii) This then calls the new security_bprm_set_creds(). This should calculate the new LSM and capability credentials into *bprm->cred. This folds together security_bprm_set() and parts of security_bprm_apply_creds() (these two have been removed). Anything that might fail must be done at this point. (iii) bprm->cred_prepared is set to 1. bprm->cred_prepared is 0 on the first pass of the security calculations, and 1 on all subsequent passes. This allows SELinux in (ii) to base its calculations only on the initial script and not on the interpreter. (d) flush_old_exec() is called to commit the task to execution. This performs the following steps with regard to credentials: (i) Clear pdeath_signal and set dumpable on certain circumstances that may not be covered by commit_creds(). (ii) Clear any bits in current->personality that were deferred from (c.i). (e) install_exec_creds() [compute_creds() as was] is called to install the new credentials. This performs the following steps with regard to credentials: (i) Calls security_bprm_committing_creds() to apply any security requirements, such as flushing unauthorised files in SELinux, that must be done before the credentials are changed. This is made up of bits of security_bprm_apply_creds() and security_bprm_post_apply_creds(), both of which have been removed. This function is not allowed to fail; anything that might fail must have been done in (c.ii). (ii) Calls commit_creds() to apply the new credentials in a single assignment (more or less). Possibly pdeath_signal and dumpable should be part of struct creds. (iii) Unlocks the task's cred_replace_mutex, thus allowing PTRACE_ATTACH to take place. (iv) Clears The bprm->cred pointer as the credentials it was holding are now immutable. (v) Calls security_bprm_committed_creds() to apply any security alterations that must be done after the creds have been changed. SELinux uses this to flush signals and signal handlers. (f) If an error occurs before (d.i), bprm_free() will call abort_creds() to destroy the proposed new credentials and will then unlock cred_replace_mutex. No changes to the credentials will have been made. (2) LSM interface. A number of functions have been changed, added or removed: (*) security_bprm_alloc(), ->bprm_alloc_security() (*) security_bprm_free(), ->bprm_free_security() Removed in favour of preparing new credentials and modifying those. (*) security_bprm_apply_creds(), ->bprm_apply_creds() (*) security_bprm_post_apply_creds(), ->bprm_post_apply_creds() Removed; split between security_bprm_set_creds(), security_bprm_committing_creds() and security_bprm_committed_creds(). (*) security_bprm_set(), ->bprm_set_security() Removed; folded into security_bprm_set_creds(). (*) security_bprm_set_creds(), ->bprm_set_creds() New. The new credentials in bprm->creds should be checked and set up as appropriate. bprm->cred_prepared is 0 on the first call, 1 on the second and subsequent calls. (*) security_bprm_committing_creds(), ->bprm_committing_creds() (*) security_bprm_committed_creds(), ->bprm_committed_creds() New. Apply the security effects of the new credentials. This includes closing unauthorised files in SELinux. This function may not fail. When the former is called, the creds haven't yet been applied to the process; when the latter is called, they have. The former may access bprm->cred, the latter may not. (3) SELinux. SELinux has a number of changes, in addition to those to support the LSM interface changes mentioned above: (a) The bprm_security_struct struct has been removed in favour of using the credentials-under-construction approach. (c) flush_unauthorized_files() now takes a cred pointer and passes it on to inode_has_perm(), file_has_perm() and dentry_open(). Signed-off-by: David Howells <dhowells@redhat.com> Acked-by: James Morris <jmorris@namei.org> Acked-by: Serge Hallyn <serue@us.ibm.com> Signed-off-by: James Morris <jmorris@namei.org>
2008-11-14 07:39:24 +08:00
* @bprm_set_creds:
* Save security information in the bprm->security field, typically based
* on information about the bprm->file, for later use by the apply_creds
* hook. This hook may also optionally check permissions (e.g. for
* transitions between security domains).
* This hook may be called multiple times during a single execve, e.g. for
* interpreters. The hook can tell whether it has already been called by
* checking to see if @bprm->security is non-NULL. If so, then the hook
* may decide either to retain the security information saved earlier or
* to replace it.
* @bprm contains the linux_binprm structure.
* Return 0 if the hook is successful and permission is granted.
* @bprm_check_security:
CRED: Make execve() take advantage of copy-on-write credentials Make execve() take advantage of copy-on-write credentials, allowing it to set up the credentials in advance, and then commit the whole lot after the point of no return. This patch and the preceding patches have been tested with the LTP SELinux testsuite. This patch makes several logical sets of alteration: (1) execve(). The credential bits from struct linux_binprm are, for the most part, replaced with a single credentials pointer (bprm->cred). This means that all the creds can be calculated in advance and then applied at the point of no return with no possibility of failure. I would like to replace bprm->cap_effective with: cap_isclear(bprm->cap_effective) but this seems impossible due to special behaviour for processes of pid 1 (they always retain their parent's capability masks where normally they'd be changed - see cap_bprm_set_creds()). The following sequence of events now happens: (a) At the start of do_execve, the current task's cred_exec_mutex is locked to prevent PTRACE_ATTACH from obsoleting the calculation of creds that we make. (a) prepare_exec_creds() is then called to make a copy of the current task's credentials and prepare it. This copy is then assigned to bprm->cred. This renders security_bprm_alloc() and security_bprm_free() unnecessary, and so they've been removed. (b) The determination of unsafe execution is now performed immediately after (a) rather than later on in the code. The result is stored in bprm->unsafe for future reference. (c) prepare_binprm() is called, possibly multiple times. (i) This applies the result of set[ug]id binaries to the new creds attached to bprm->cred. Personality bit clearance is recorded, but now deferred on the basis that the exec procedure may yet fail. (ii) This then calls the new security_bprm_set_creds(). This should calculate the new LSM and capability credentials into *bprm->cred. This folds together security_bprm_set() and parts of security_bprm_apply_creds() (these two have been removed). Anything that might fail must be done at this point. (iii) bprm->cred_prepared is set to 1. bprm->cred_prepared is 0 on the first pass of the security calculations, and 1 on all subsequent passes. This allows SELinux in (ii) to base its calculations only on the initial script and not on the interpreter. (d) flush_old_exec() is called to commit the task to execution. This performs the following steps with regard to credentials: (i) Clear pdeath_signal and set dumpable on certain circumstances that may not be covered by commit_creds(). (ii) Clear any bits in current->personality that were deferred from (c.i). (e) install_exec_creds() [compute_creds() as was] is called to install the new credentials. This performs the following steps with regard to credentials: (i) Calls security_bprm_committing_creds() to apply any security requirements, such as flushing unauthorised files in SELinux, that must be done before the credentials are changed. This is made up of bits of security_bprm_apply_creds() and security_bprm_post_apply_creds(), both of which have been removed. This function is not allowed to fail; anything that might fail must have been done in (c.ii). (ii) Calls commit_creds() to apply the new credentials in a single assignment (more or less). Possibly pdeath_signal and dumpable should be part of struct creds. (iii) Unlocks the task's cred_replace_mutex, thus allowing PTRACE_ATTACH to take place. (iv) Clears The bprm->cred pointer as the credentials it was holding are now immutable. (v) Calls security_bprm_committed_creds() to apply any security alterations that must be done after the creds have been changed. SELinux uses this to flush signals and signal handlers. (f) If an error occurs before (d.i), bprm_free() will call abort_creds() to destroy the proposed new credentials and will then unlock cred_replace_mutex. No changes to the credentials will have been made. (2) LSM interface. A number of functions have been changed, added or removed: (*) security_bprm_alloc(), ->bprm_alloc_security() (*) security_bprm_free(), ->bprm_free_security() Removed in favour of preparing new credentials and modifying those. (*) security_bprm_apply_creds(), ->bprm_apply_creds() (*) security_bprm_post_apply_creds(), ->bprm_post_apply_creds() Removed; split between security_bprm_set_creds(), security_bprm_committing_creds() and security_bprm_committed_creds(). (*) security_bprm_set(), ->bprm_set_security() Removed; folded into security_bprm_set_creds(). (*) security_bprm_set_creds(), ->bprm_set_creds() New. The new credentials in bprm->creds should be checked and set up as appropriate. bprm->cred_prepared is 0 on the first call, 1 on the second and subsequent calls. (*) security_bprm_committing_creds(), ->bprm_committing_creds() (*) security_bprm_committed_creds(), ->bprm_committed_creds() New. Apply the security effects of the new credentials. This includes closing unauthorised files in SELinux. This function may not fail. When the former is called, the creds haven't yet been applied to the process; when the latter is called, they have. The former may access bprm->cred, the latter may not. (3) SELinux. SELinux has a number of changes, in addition to those to support the LSM interface changes mentioned above: (a) The bprm_security_struct struct has been removed in favour of using the credentials-under-construction approach. (c) flush_unauthorized_files() now takes a cred pointer and passes it on to inode_has_perm(), file_has_perm() and dentry_open(). Signed-off-by: David Howells <dhowells@redhat.com> Acked-by: James Morris <jmorris@namei.org> Acked-by: Serge Hallyn <serue@us.ibm.com> Signed-off-by: James Morris <jmorris@namei.org>
2008-11-14 07:39:24 +08:00
* This hook mediates the point when a search for a binary handler will
* begin. It allows a check the @bprm->security value which is set in the
* preceding set_creds call. The primary difference from set_creds is
* that the argv list and envp list are reliably available in @bprm. This
* hook may be called multiple times during a single execve; and in each
* pass set_creds is called first.
* @bprm contains the linux_binprm structure.
* Return 0 if the hook is successful and permission is granted.
CRED: Make execve() take advantage of copy-on-write credentials Make execve() take advantage of copy-on-write credentials, allowing it to set up the credentials in advance, and then commit the whole lot after the point of no return. This patch and the preceding patches have been tested with the LTP SELinux testsuite. This patch makes several logical sets of alteration: (1) execve(). The credential bits from struct linux_binprm are, for the most part, replaced with a single credentials pointer (bprm->cred). This means that all the creds can be calculated in advance and then applied at the point of no return with no possibility of failure. I would like to replace bprm->cap_effective with: cap_isclear(bprm->cap_effective) but this seems impossible due to special behaviour for processes of pid 1 (they always retain their parent's capability masks where normally they'd be changed - see cap_bprm_set_creds()). The following sequence of events now happens: (a) At the start of do_execve, the current task's cred_exec_mutex is locked to prevent PTRACE_ATTACH from obsoleting the calculation of creds that we make. (a) prepare_exec_creds() is then called to make a copy of the current task's credentials and prepare it. This copy is then assigned to bprm->cred. This renders security_bprm_alloc() and security_bprm_free() unnecessary, and so they've been removed. (b) The determination of unsafe execution is now performed immediately after (a) rather than later on in the code. The result is stored in bprm->unsafe for future reference. (c) prepare_binprm() is called, possibly multiple times. (i) This applies the result of set[ug]id binaries to the new creds attached to bprm->cred. Personality bit clearance is recorded, but now deferred on the basis that the exec procedure may yet fail. (ii) This then calls the new security_bprm_set_creds(). This should calculate the new LSM and capability credentials into *bprm->cred. This folds together security_bprm_set() and parts of security_bprm_apply_creds() (these two have been removed). Anything that might fail must be done at this point. (iii) bprm->cred_prepared is set to 1. bprm->cred_prepared is 0 on the first pass of the security calculations, and 1 on all subsequent passes. This allows SELinux in (ii) to base its calculations only on the initial script and not on the interpreter. (d) flush_old_exec() is called to commit the task to execution. This performs the following steps with regard to credentials: (i) Clear pdeath_signal and set dumpable on certain circumstances that may not be covered by commit_creds(). (ii) Clear any bits in current->personality that were deferred from (c.i). (e) install_exec_creds() [compute_creds() as was] is called to install the new credentials. This performs the following steps with regard to credentials: (i) Calls security_bprm_committing_creds() to apply any security requirements, such as flushing unauthorised files in SELinux, that must be done before the credentials are changed. This is made up of bits of security_bprm_apply_creds() and security_bprm_post_apply_creds(), both of which have been removed. This function is not allowed to fail; anything that might fail must have been done in (c.ii). (ii) Calls commit_creds() to apply the new credentials in a single assignment (more or less). Possibly pdeath_signal and dumpable should be part of struct creds. (iii) Unlocks the task's cred_replace_mutex, thus allowing PTRACE_ATTACH to take place. (iv) Clears The bprm->cred pointer as the credentials it was holding are now immutable. (v) Calls security_bprm_committed_creds() to apply any security alterations that must be done after the creds have been changed. SELinux uses this to flush signals and signal handlers. (f) If an error occurs before (d.i), bprm_free() will call abort_creds() to destroy the proposed new credentials and will then unlock cred_replace_mutex. No changes to the credentials will have been made. (2) LSM interface. A number of functions have been changed, added or removed: (*) security_bprm_alloc(), ->bprm_alloc_security() (*) security_bprm_free(), ->bprm_free_security() Removed in favour of preparing new credentials and modifying those. (*) security_bprm_apply_creds(), ->bprm_apply_creds() (*) security_bprm_post_apply_creds(), ->bprm_post_apply_creds() Removed; split between security_bprm_set_creds(), security_bprm_committing_creds() and security_bprm_committed_creds(). (*) security_bprm_set(), ->bprm_set_security() Removed; folded into security_bprm_set_creds(). (*) security_bprm_set_creds(), ->bprm_set_creds() New. The new credentials in bprm->creds should be checked and set up as appropriate. bprm->cred_prepared is 0 on the first call, 1 on the second and subsequent calls. (*) security_bprm_committing_creds(), ->bprm_committing_creds() (*) security_bprm_committed_creds(), ->bprm_committed_creds() New. Apply the security effects of the new credentials. This includes closing unauthorised files in SELinux. This function may not fail. When the former is called, the creds haven't yet been applied to the process; when the latter is called, they have. The former may access bprm->cred, the latter may not. (3) SELinux. SELinux has a number of changes, in addition to those to support the LSM interface changes mentioned above: (a) The bprm_security_struct struct has been removed in favour of using the credentials-under-construction approach. (c) flush_unauthorized_files() now takes a cred pointer and passes it on to inode_has_perm(), file_has_perm() and dentry_open(). Signed-off-by: David Howells <dhowells@redhat.com> Acked-by: James Morris <jmorris@namei.org> Acked-by: Serge Hallyn <serue@us.ibm.com> Signed-off-by: James Morris <jmorris@namei.org>
2008-11-14 07:39:24 +08:00
* @bprm_committing_creds:
* Prepare to install the new security attributes of a process being
* transformed by an execve operation, based on the old credentials
* pointed to by @current->cred and the information set in @bprm->cred by
* the bprm_set_creds hook. @bprm points to the linux_binprm structure.
* This hook is a good place to perform state changes on the process such
* as closing open file descriptors to which access will no longer be
* granted when the attributes are changed. This is called immediately
* before commit_creds().
* @bprm_committed_creds:
* Tidy up after the installation of the new security attributes of a
* process being transformed by an execve operation. The new credentials
* have, by this point, been set to @current->cred. @bprm points to the
* linux_binprm structure. This hook is a good place to perform state
* changes on the process such as clearing out non-inheritable signal
* state. This is called immediately after commit_creds().
* @bprm_secureexec:
* Return a boolean value (0 or 1) indicating whether a "secure exec"
* is required. The flag is passed in the auxiliary table
* on the initial stack to the ELF interpreter to indicate whether libc
* should enable secure mode.
* @bprm contains the linux_binprm structure.
*
* Security hooks for filesystem operations.
*
* @sb_alloc_security:
* Allocate and attach a security structure to the sb->s_security field.
* The s_security field is initialized to NULL when the structure is
* allocated.
* @sb contains the super_block structure to be modified.
* Return 0 if operation was successful.
* @sb_free_security:
* Deallocate and clear the sb->s_security field.
* @sb contains the super_block structure to be modified.
* @sb_statfs:
* Check permission before obtaining filesystem statistics for the @mnt
* mountpoint.
* @dentry is a handle on the superblock for the filesystem.
* Return 0 if permission is granted.
* @sb_mount:
* Check permission before an object specified by @dev_name is mounted on
* the mount point named by @nd. For an ordinary mount, @dev_name
* identifies a device if the file system type requires a device. For a
* remount (@flags & MS_REMOUNT), @dev_name is irrelevant. For a
* loopback/bind mount (@flags & MS_BIND), @dev_name identifies the
* pathname of the object being mounted.
* @dev_name contains the name for object being mounted.
* @path contains the path for mount point object.
* @type contains the filesystem type.
* @flags contains the mount flags.
* @data contains the filesystem-specific data.
* Return 0 if permission is granted.
* @sb_copy_data:
* Allow mount option data to be copied prior to parsing by the filesystem,
* so that the security module can extract security-specific mount
* options cleanly (a filesystem may modify the data e.g. with strsep()).
* This also allows the original mount data to be stripped of security-
* specific options to avoid having to make filesystems aware of them.
* @type the type of filesystem being mounted.
* @orig the original mount data copied from userspace.
* @copy copied data which will be passed to the security module.
* Returns 0 if the copy was successful.
* @sb_umount:
* Check permission before the @mnt file system is unmounted.
* @mnt contains the mounted file system.
* @flags contains the unmount flags, e.g. MNT_FORCE.
* Return 0 if permission is granted.
* @sb_pivotroot:
* Check permission before pivoting the root filesystem.
* @old_path contains the path for the new location of the current root (put_old).
* @new_path contains the path for the new root (new_root).
* Return 0 if permission is granted.
Security: add get, set, and cloning of superblock security information Adds security_get_sb_mnt_opts, security_set_sb_mnt_opts, and security_clont_sb_mnt_opts to the LSM and to SELinux. This will allow filesystems to directly own and control all of their mount options if they so choose. This interface deals only with option identifiers and strings so it should generic enough for any LSM which may come in the future. Filesystems which pass text mount data around in the kernel (almost all of them) need not currently make use of this interface when dealing with SELinux since it will still parse those strings as it always has. I assume future LSM's would do the same. NFS is the primary FS which does not use text mount data and thus must make use of this interface. An LSM would need to implement these functions only if they had mount time options, such as selinux has context= or fscontext=. If the LSM has no mount time options they could simply not implement and let the dummy ops take care of things. An LSM other than SELinux would need to define new option numbers in security.h and any FS which decides to own there own security options would need to be patched to use this new interface for every possible LSM. This is because it was stated to me very clearly that LSM's should not attempt to understand FS mount data and the burdon to understand security should be in the FS which owns the options. Signed-off-by: Eric Paris <eparis@redhat.com> Acked-by: Stephen D. Smalley <sds@tycho.nsa.gov> Signed-off-by: James Morris <jmorris@namei.org>
2007-12-01 02:00:35 +08:00
* @sb_set_mnt_opts:
* Set the security relevant mount options used for a superblock
* @sb the superblock to set security mount options for
* @opts binary data structure containing all lsm mount data
Security: add get, set, and cloning of superblock security information Adds security_get_sb_mnt_opts, security_set_sb_mnt_opts, and security_clont_sb_mnt_opts to the LSM and to SELinux. This will allow filesystems to directly own and control all of their mount options if they so choose. This interface deals only with option identifiers and strings so it should generic enough for any LSM which may come in the future. Filesystems which pass text mount data around in the kernel (almost all of them) need not currently make use of this interface when dealing with SELinux since it will still parse those strings as it always has. I assume future LSM's would do the same. NFS is the primary FS which does not use text mount data and thus must make use of this interface. An LSM would need to implement these functions only if they had mount time options, such as selinux has context= or fscontext=. If the LSM has no mount time options they could simply not implement and let the dummy ops take care of things. An LSM other than SELinux would need to define new option numbers in security.h and any FS which decides to own there own security options would need to be patched to use this new interface for every possible LSM. This is because it was stated to me very clearly that LSM's should not attempt to understand FS mount data and the burdon to understand security should be in the FS which owns the options. Signed-off-by: Eric Paris <eparis@redhat.com> Acked-by: Stephen D. Smalley <sds@tycho.nsa.gov> Signed-off-by: James Morris <jmorris@namei.org>
2007-12-01 02:00:35 +08:00
* @sb_clone_mnt_opts:
* Copy all security options from a given superblock to another
* @oldsb old superblock which contain information to clone
* @newsb new superblock which needs filled in
* @sb_parse_opts_str:
* Parse a string of security data filling in the opts structure
* @options string containing all mount options known by the LSM
* @opts binary data structure usable by the LSM
*
* Security hooks for inode operations.
*
* @inode_alloc_security:
* Allocate and attach a security structure to @inode->i_security. The
* i_security field is initialized to NULL when the inode structure is
* allocated.
* @inode contains the inode structure.
* Return 0 if operation was successful.
* @inode_free_security:
* @inode contains the inode structure.
* Deallocate the inode security structure and set @inode->i_security to
* NULL.
[PATCH] security: enable atomic inode security labeling The following patch set enables atomic security labeling of newly created inodes by altering the fs code to invoke a new LSM hook to obtain the security attribute to apply to a newly created inode and to set up the incore inode security state during the inode creation transaction. This parallels the existing processing for setting ACLs on newly created inodes. Otherwise, it is possible for new inodes to be accessed by another thread via the dcache prior to complete security setup (presently handled by the post_create/mkdir/... LSM hooks in the VFS) and a newly created inode may be left unlabeled on the disk in the event of a crash. SELinux presently works around the issue by ensuring that the incore inode security label is initialized to a special SID that is inaccessible to unprivileged processes (in accordance with policy), thereby preventing inappropriate access but potentially causing false denials on legitimate accesses. A simple test program demonstrates such false denials on SELinux, and the patch solves the problem. Similar such false denials have been encountered in real applications. This patch defines a new inode_init_security LSM hook to obtain the security attribute to apply to a newly created inode and to set up the incore inode security state for it, and adds a corresponding hook function implementation to SELinux. Signed-off-by: Stephen Smalley <sds@tycho.nsa.gov> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-09-10 04:01:35 +08:00
* @inode_init_security:
* Obtain the security attribute name suffix and value to set on a newly
[PATCH] security: enable atomic inode security labeling The following patch set enables atomic security labeling of newly created inodes by altering the fs code to invoke a new LSM hook to obtain the security attribute to apply to a newly created inode and to set up the incore inode security state during the inode creation transaction. This parallels the existing processing for setting ACLs on newly created inodes. Otherwise, it is possible for new inodes to be accessed by another thread via the dcache prior to complete security setup (presently handled by the post_create/mkdir/... LSM hooks in the VFS) and a newly created inode may be left unlabeled on the disk in the event of a crash. SELinux presently works around the issue by ensuring that the incore inode security label is initialized to a special SID that is inaccessible to unprivileged processes (in accordance with policy), thereby preventing inappropriate access but potentially causing false denials on legitimate accesses. A simple test program demonstrates such false denials on SELinux, and the patch solves the problem. Similar such false denials have been encountered in real applications. This patch defines a new inode_init_security LSM hook to obtain the security attribute to apply to a newly created inode and to set up the incore inode security state for it, and adds a corresponding hook function implementation to SELinux. Signed-off-by: Stephen Smalley <sds@tycho.nsa.gov> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-09-10 04:01:35 +08:00
* created inode and set up the incore security field for the new inode.
* This hook is called by the fs code as part of the inode creation
* transaction and provides for atomic labeling of the inode, unlike
* the post_create/mkdir/... hooks called by the VFS. The hook function
* is expected to allocate the name and value via kmalloc, with the caller
* being responsible for calling kfree after using them.
* If the security module does not use security attributes or does
* not wish to put a security attribute on this particular inode,
* then it should return -EOPNOTSUPP to skip this processing.
* @inode contains the inode structure of the newly created inode.
* @dir contains the inode structure of the parent directory.
* @name will be set to the allocated name suffix (e.g. selinux).
* @value will be set to the allocated attribute value.
* @len will be set to the length of the value.
* Returns 0 if @name and @value have been successfully set,
* -EOPNOTSUPP if no security attribute is needed, or
* -ENOMEM on memory allocation failure.
* @inode_create:
* Check permission to create a regular file.
* @dir contains inode structure of the parent of the new file.
* @dentry contains the dentry structure for the file to be created.
* @mode contains the file mode of the file to be created.
* Return 0 if permission is granted.
* @inode_link:
* Check permission before creating a new hard link to a file.
* @old_dentry contains the dentry structure for an existing link to the file.
* @dir contains the inode structure of the parent directory of the new link.
* @new_dentry contains the dentry structure for the new link.
* Return 0 if permission is granted.
* @path_link:
* Check permission before creating a new hard link to a file.
* @old_dentry contains the dentry structure for an existing link
* to the file.
* @new_dir contains the path structure of the parent directory of
* the new link.
* @new_dentry contains the dentry structure for the new link.
* Return 0 if permission is granted.
* @inode_unlink:
* Check the permission to remove a hard link to a file.
* @dir contains the inode structure of parent directory of the file.
* @dentry contains the dentry structure for file to be unlinked.
* Return 0 if permission is granted.
* @path_unlink:
* Check the permission to remove a hard link to a file.
* @dir contains the path structure of parent directory of the file.
* @dentry contains the dentry structure for file to be unlinked.
* Return 0 if permission is granted.
* @inode_symlink:
* Check the permission to create a symbolic link to a file.
* @dir contains the inode structure of parent directory of the symbolic link.
* @dentry contains the dentry structure of the symbolic link.
* @old_name contains the pathname of file.
* Return 0 if permission is granted.
* @path_symlink:
* Check the permission to create a symbolic link to a file.
* @dir contains the path structure of parent directory of
* the symbolic link.
* @dentry contains the dentry structure of the symbolic link.
* @old_name contains the pathname of file.
* Return 0 if permission is granted.
* @inode_mkdir:
* Check permissions to create a new directory in the existing directory
* associated with inode strcture @dir.
* @dir containst the inode structure of parent of the directory to be created.
* @dentry contains the dentry structure of new directory.
* @mode contains the mode of new directory.
* Return 0 if permission is granted.
* @path_mkdir:
* Check permissions to create a new directory in the existing directory
* associated with path strcture @path.
* @dir containst the path structure of parent of the directory
* to be created.
* @dentry contains the dentry structure of new directory.
* @mode contains the mode of new directory.
* Return 0 if permission is granted.
* @inode_rmdir:
* Check the permission to remove a directory.
* @dir contains the inode structure of parent of the directory to be removed.
* @dentry contains the dentry structure of directory to be removed.
* Return 0 if permission is granted.
* @path_rmdir:
* Check the permission to remove a directory.
* @dir contains the path structure of parent of the directory to be
* removed.
* @dentry contains the dentry structure of directory to be removed.
* Return 0 if permission is granted.
* @inode_mknod:
* Check permissions when creating a special file (or a socket or a fifo
* file created via the mknod system call). Note that if mknod operation
* is being done for a regular file, then the create hook will be called
* and not this hook.
* @dir contains the inode structure of parent of the new file.
* @dentry contains the dentry structure of the new file.
* @mode contains the mode of the new file.
* @dev contains the device number.
* Return 0 if permission is granted.
* @path_mknod:
* Check permissions when creating a file. Note that this hook is called
* even if mknod operation is being done for a regular file.
* @dir contains the path structure of parent of the new file.
* @dentry contains the dentry structure of the new file.
* @mode contains the mode of the new file.
* @dev contains the undecoded device number. Use new_decode_dev() to get
* the decoded device number.
* Return 0 if permission is granted.
* @inode_rename:
* Check for permission to rename a file or directory.
* @old_dir contains the inode structure for parent of the old link.
* @old_dentry contains the dentry structure of the old link.
* @new_dir contains the inode structure for parent of the new link.
* @new_dentry contains the dentry structure of the new link.
* Return 0 if permission is granted.
* @path_rename:
* Check for permission to rename a file or directory.
* @old_dir contains the path structure for parent of the old link.
* @old_dentry contains the dentry structure of the old link.
* @new_dir contains the path structure for parent of the new link.
* @new_dentry contains the dentry structure of the new link.
* Return 0 if permission is granted.
* @path_chmod:
* Check for permission to change DAC's permission of a file or directory.
* @dentry contains the dentry structure.
* @mnt contains the vfsmnt structure.
* @mode contains DAC's mode.
* Return 0 if permission is granted.
* @path_chown:
* Check for permission to change owner/group of a file or directory.
* @path contains the path structure.
* @uid contains new owner's ID.
* @gid contains new group's ID.
* Return 0 if permission is granted.
* @path_chroot:
* Check for permission to change root directory.
* @path contains the path structure.
* Return 0 if permission is granted.
* @inode_readlink:
* Check the permission to read the symbolic link.
* @dentry contains the dentry structure for the file link.
* Return 0 if permission is granted.
* @inode_follow_link:
* Check permission to follow a symbolic link when looking up a pathname.
* @dentry contains the dentry structure for the link.
* @nd contains the nameidata structure for the parent directory.
* Return 0 if permission is granted.
* @inode_permission:
* Check permission before accessing an inode. This hook is called by the
* existing Linux permission function, so a security module can use it to
* provide additional checking for existing Linux permission checks.
* Notice that this hook is called when a file is opened (as well as many
* other operations), whereas the file_security_ops permission hook is
* called when the actual read/write operations are performed.
* @inode contains the inode structure to check.
* @mask contains the permission mask.
* Return 0 if permission is granted.
* @inode_setattr:
* Check permission before setting file attributes. Note that the kernel
* call to notify_change is performed from several locations, whenever
* file attributes change (such as when a file is truncated, chown/chmod
* operations, transferring disk quotas, etc).
* @dentry contains the dentry structure for the file.
* @attr is the iattr structure containing the new file attributes.
* Return 0 if permission is granted.
* @path_truncate:
* Check permission before truncating a file.
* @path contains the path structure for the file.
* Return 0 if permission is granted.
* @inode_getattr:
* Check permission before obtaining file attributes.
* @mnt is the vfsmount where the dentry was looked up
* @dentry contains the dentry structure for the file.
* Return 0 if permission is granted.
* @inode_setxattr:
* Check permission before setting the extended attributes
* @value identified by @name for @dentry.
* Return 0 if permission is granted.
* @inode_post_setxattr:
* Update inode security field after successful setxattr operation.
* @value identified by @name for @dentry.
* @inode_getxattr:
* Check permission before obtaining the extended attributes
* identified by @name for @dentry.
* Return 0 if permission is granted.
* @inode_listxattr:
* Check permission before obtaining the list of extended attribute
* names for @dentry.
* Return 0 if permission is granted.
* @inode_removexattr:
* Check permission before removing the extended attribute
* identified by @name for @dentry.
* Return 0 if permission is granted.
* @inode_getsecurity:
* Retrieve a copy of the extended attribute representation of the
* security label associated with @name for @inode via @buffer. Note that
* @name is the remainder of the attribute name after the security prefix
* has been removed. @alloc is used to specify of the call should return a
* value via the buffer or just the value length Return size of buffer on
* success.
* @inode_setsecurity:
* Set the security label associated with @name for @inode from the
* extended attribute value @value. @size indicates the size of the
* @value in bytes. @flags may be XATTR_CREATE, XATTR_REPLACE, or 0.
* Note that @name is the remainder of the attribute name after the
* security. prefix has been removed.
* Return 0 on success.
* @inode_listsecurity:
* Copy the extended attribute names for the security labels
* associated with @inode into @buffer. The maximum size of @buffer
* is specified by @buffer_size. @buffer may be NULL to request
* the size of the buffer required.
* Returns number of bytes used/required on success.
Implement file posix capabilities Implement file posix capabilities. This allows programs to be given a subset of root's powers regardless of who runs them, without having to use setuid and giving the binary all of root's powers. This version works with Kaigai Kohei's userspace tools, found at http://www.kaigai.gr.jp/index.php. For more information on how to use this patch, Chris Friedhoff has posted a nice page at http://www.friedhoff.org/fscaps.html. Changelog: Nov 27: Incorporate fixes from Andrew Morton (security-introduce-file-caps-tweaks and security-introduce-file-caps-warning-fix) Fix Kconfig dependency. Fix change signaling behavior when file caps are not compiled in. Nov 13: Integrate comments from Alexey: Remove CONFIG_ ifdef from capability.h, and use %zd for printing a size_t. Nov 13: Fix endianness warnings by sparse as suggested by Alexey Dobriyan. Nov 09: Address warnings of unused variables at cap_bprm_set_security when file capabilities are disabled, and simultaneously clean up the code a little, by pulling the new code into a helper function. Nov 08: For pointers to required userspace tools and how to use them, see http://www.friedhoff.org/fscaps.html. Nov 07: Fix the calculation of the highest bit checked in check_cap_sanity(). Nov 07: Allow file caps to be enabled without CONFIG_SECURITY, since capabilities are the default. Hook cap_task_setscheduler when !CONFIG_SECURITY. Move capable(TASK_KILL) to end of cap_task_kill to reduce audit messages. Nov 05: Add secondary calls in selinux/hooks.c to task_setioprio and task_setscheduler so that selinux and capabilities with file cap support can be stacked. Sep 05: As Seth Arnold points out, uid checks are out of place for capability code. Sep 01: Define task_setscheduler, task_setioprio, cap_task_kill, and task_setnice to make sure a user cannot affect a process in which they called a program with some fscaps. One remaining question is the note under task_setscheduler: are we ok with CAP_SYS_NICE being sufficient to confine a process to a cpuset? It is a semantic change, as without fsccaps, attach_task doesn't allow CAP_SYS_NICE to override the uid equivalence check. But since it uses security_task_setscheduler, which elsewhere is used where CAP_SYS_NICE can be used to override the uid equivalence check, fixing it might be tough. task_setscheduler note: this also controls cpuset:attach_task. Are we ok with CAP_SYS_NICE being used to confine to a cpuset? task_setioprio task_setnice sys_setpriority uses this (through set_one_prio) for another process. Need same checks as setrlimit Aug 21: Updated secureexec implementation to reflect the fact that euid and uid might be the same and nonzero, but the process might still have elevated caps. Aug 15: Handle endianness of xattrs. Enforce capability version match between kernel and disk. Enforce that no bits beyond the known max capability are set, else return -EPERM. With this extra processing, it may be worth reconsidering doing all the work at bprm_set_security rather than d_instantiate. Aug 10: Always call getxattr at bprm_set_security, rather than caching it at d_instantiate. [morgan@kernel.org: file-caps clean up for linux/capability.h] [bunk@kernel.org: unexport cap_inode_killpriv] Signed-off-by: Serge E. Hallyn <serue@us.ibm.com> Cc: Stephen Smalley <sds@tycho.nsa.gov> Cc: James Morris <jmorris@namei.org> Cc: Chris Wright <chrisw@sous-sol.org> Cc: Andrew Morgan <morgan@kernel.org> Signed-off-by: Andrew Morgan <morgan@kernel.org> Signed-off-by: Adrian Bunk <bunk@kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2007-10-17 14:31:36 +08:00
* @inode_need_killpriv:
* Called when an inode has been changed.
* @dentry is the dentry being changed.
* Return <0 on error to abort the inode change operation.
* Return 0 if inode_killpriv does not need to be called.
* Return >0 if inode_killpriv does need to be called.
* @inode_killpriv:
* The setuid bit is being removed. Remove similar security labels.
* Called with the dentry->d_inode->i_mutex held.
* @dentry is the dentry being changed.
* Return 0 on success. If error is returned, then the operation
* causing setuid bit removal is failed.
* @inode_getsecid:
* Get the secid associated with the node.
* @inode contains a pointer to the inode.
* @secid contains a pointer to the location where result will be saved.
* In case of failure, @secid will be set to zero.
*
* Security hooks for file operations
*
* @file_permission:
* Check file permissions before accessing an open file. This hook is
* called by various operations that read or write files. A security
* module can use this hook to perform additional checking on these
* operations, e.g. to revalidate permissions on use to support privilege
* bracketing or policy changes. Notice that this hook is used when the
* actual read/write operations are performed, whereas the
* inode_security_ops hook is called when a file is opened (as well as
* many other operations).
* Caveat: Although this hook can be used to revalidate permissions for
* various system call operations that read or write files, it does not
* address the revalidation of permissions for memory-mapped files.
* Security modules must handle this separately if they need such
* revalidation.
* @file contains the file structure being accessed.
* @mask contains the requested permissions.
* Return 0 if permission is granted.
* @file_alloc_security:
* Allocate and attach a security structure to the file->f_security field.
* The security field is initialized to NULL when the structure is first
* created.
* @file contains the file structure to secure.
* Return 0 if the hook is successful and permission is granted.
* @file_free_security:
* Deallocate and free any security structures stored in file->f_security.
* @file contains the file structure being modified.
* @file_ioctl:
* @file contains the file structure.
* @cmd contains the operation to perform.
* @arg contains the operational arguments.
* Check permission for an ioctl operation on @file. Note that @arg can
* sometimes represents a user space pointer; in other cases, it may be a
* simple integer value. When @arg represents a user space pointer, it
* should never be used by the security module.
* Return 0 if permission is granted.
* @file_mmap :
* Check permissions for a mmap operation. The @file may be NULL, e.g.
* if mapping anonymous memory.
* @file contains the file structure for file to map (may be NULL).
* @reqprot contains the protection requested by the application.
* @prot contains the protection that will be applied by the kernel.
* @flags contains the operational flags.
* Return 0 if permission is granted.
* @file_mprotect:
* Check permissions before changing memory access permissions.
* @vma contains the memory region to modify.
* @reqprot contains the protection requested by the application.
* @prot contains the protection that will be applied by the kernel.
* Return 0 if permission is granted.
* @file_lock:
* Check permission before performing file locking operations.
* Note: this hook mediates both flock and fcntl style locks.
* @file contains the file structure.
* @cmd contains the posix-translated lock operation to perform
* (e.g. F_RDLCK, F_WRLCK).
* Return 0 if permission is granted.
* @file_fcntl:
* Check permission before allowing the file operation specified by @cmd
* from being performed on the file @file. Note that @arg can sometimes
* represents a user space pointer; in other cases, it may be a simple
* integer value. When @arg represents a user space pointer, it should
* never be used by the security module.
* @file contains the file structure.
* @cmd contains the operation to be performed.
* @arg contains the operational arguments.
* Return 0 if permission is granted.
* @file_set_fowner:
* Save owner security information (typically from current->security) in
* file->f_security for later use by the send_sigiotask hook.
* @file contains the file structure to update.
* Return 0 on success.
* @file_send_sigiotask:
* Check permission for the file owner @fown to send SIGIO or SIGURG to the
* process @tsk. Note that this hook is sometimes called from interrupt.
* Note that the fown_struct, @fown, is never outside the context of a
* struct file, so the file structure (and associated security information)
* can always be obtained:
* container_of(fown, struct file, f_owner)
* @tsk contains the structure of task receiving signal.
* @fown contains the file owner information.
* @sig is the signal that will be sent. When 0, kernel sends SIGIO.
* Return 0 if permission is granted.
* @file_receive:
* This hook allows security modules to control the ability of a process
* to receive an open file descriptor via socket IPC.
* @file contains the file structure being received.
* Return 0 if permission is granted.
*
* Security hook for dentry
*
* @dentry_open
* Save open-time permission checking state for later use upon
* file_permission, and recheck access if anything has changed
* since inode_permission.
*
* Security hooks for task operations.
*
* @task_create:
* Check permission before creating a child process. See the clone(2)
* manual page for definitions of the @clone_flags.
* @clone_flags contains the flags indicating what should be shared.
* Return 0 if permission is granted.
KEYS: Add a keyctl to install a process's session keyring on its parent [try #6] Add a keyctl to install a process's session keyring onto its parent. This replaces the parent's session keyring. Because the COW credential code does not permit one process to change another process's credentials directly, the change is deferred until userspace next starts executing again. Normally this will be after a wait*() syscall. To support this, three new security hooks have been provided: cred_alloc_blank() to allocate unset security creds, cred_transfer() to fill in the blank security creds and key_session_to_parent() - which asks the LSM if the process may replace its parent's session keyring. The replacement may only happen if the process has the same ownership details as its parent, and the process has LINK permission on the session keyring, and the session keyring is owned by the process, and the LSM permits it. Note that this requires alteration to each architecture's notify_resume path. This has been done for all arches barring blackfin, m68k* and xtensa, all of which need assembly alteration to support TIF_NOTIFY_RESUME. This allows the replacement to be performed at the point the parent process resumes userspace execution. This allows the userspace AFS pioctl emulation to fully emulate newpag() and the VIOCSETTOK and VIOCSETTOK2 pioctls, all of which require the ability to alter the parent process's PAG membership. However, since kAFS doesn't use PAGs per se, but rather dumps the keys into the session keyring, the session keyring of the parent must be replaced if, for example, VIOCSETTOK is passed the newpag flag. This can be tested with the following program: #include <stdio.h> #include <stdlib.h> #include <keyutils.h> #define KEYCTL_SESSION_TO_PARENT 18 #define OSERROR(X, S) do { if ((long)(X) == -1) { perror(S); exit(1); } } while(0) int main(int argc, char **argv) { key_serial_t keyring, key; long ret; keyring = keyctl_join_session_keyring(argv[1]); OSERROR(keyring, "keyctl_join_session_keyring"); key = add_key("user", "a", "b", 1, keyring); OSERROR(key, "add_key"); ret = keyctl(KEYCTL_SESSION_TO_PARENT); OSERROR(ret, "KEYCTL_SESSION_TO_PARENT"); return 0; } Compiled and linked with -lkeyutils, you should see something like: [dhowells@andromeda ~]$ keyctl show Session Keyring -3 --alswrv 4043 4043 keyring: _ses 355907932 --alswrv 4043 -1 \_ keyring: _uid.4043 [dhowells@andromeda ~]$ /tmp/newpag [dhowells@andromeda ~]$ keyctl show Session Keyring -3 --alswrv 4043 4043 keyring: _ses 1055658746 --alswrv 4043 4043 \_ user: a [dhowells@andromeda ~]$ /tmp/newpag hello [dhowells@andromeda ~]$ keyctl show Session Keyring -3 --alswrv 4043 4043 keyring: hello 340417692 --alswrv 4043 4043 \_ user: a Where the test program creates a new session keyring, sticks a user key named 'a' into it and then installs it on its parent. Signed-off-by: David Howells <dhowells@redhat.com> Signed-off-by: James Morris <jmorris@namei.org>
2009-09-02 16:14:21 +08:00
* @cred_alloc_blank:
* @cred points to the credentials.
* @gfp indicates the atomicity of any memory allocations.
* Only allocate sufficient memory and attach to @cred such that
* cred_transfer() will not get ENOMEM.
* @cred_free:
* @cred points to the credentials.
* Deallocate and clear the cred->security field in a set of credentials.
CRED: Inaugurate COW credentials Inaugurate copy-on-write credentials management. This uses RCU to manage the credentials pointer in the task_struct with respect to accesses by other tasks. A process may only modify its own credentials, and so does not need locking to access or modify its own credentials. A mutex (cred_replace_mutex) is added to the task_struct to control the effect of PTRACE_ATTACHED on credential calculations, particularly with respect to execve(). With this patch, the contents of an active credentials struct may not be changed directly; rather a new set of credentials must be prepared, modified and committed using something like the following sequence of events: struct cred *new = prepare_creds(); int ret = blah(new); if (ret < 0) { abort_creds(new); return ret; } return commit_creds(new); There are some exceptions to this rule: the keyrings pointed to by the active credentials may be instantiated - keyrings violate the COW rule as managing COW keyrings is tricky, given that it is possible for a task to directly alter the keys in a keyring in use by another task. To help enforce this, various pointers to sets of credentials, such as those in the task_struct, are declared const. The purpose of this is compile-time discouragement of altering credentials through those pointers. Once a set of credentials has been made public through one of these pointers, it may not be modified, except under special circumstances: (1) Its reference count may incremented and decremented. (2) The keyrings to which it points may be modified, but not replaced. The only safe way to modify anything else is to create a replacement and commit using the functions described in Documentation/credentials.txt (which will be added by a later patch). This patch and the preceding patches have been tested with the LTP SELinux testsuite. This patch makes several logical sets of alteration: (1) execve(). This now prepares and commits credentials in various places in the security code rather than altering the current creds directly. (2) Temporary credential overrides. do_coredump() and sys_faccessat() now prepare their own credentials and temporarily override the ones currently on the acting thread, whilst preventing interference from other threads by holding cred_replace_mutex on the thread being dumped. This will be replaced in a future patch by something that hands down the credentials directly to the functions being called, rather than altering the task's objective credentials. (3) LSM interface. A number of functions have been changed, added or removed: (*) security_capset_check(), ->capset_check() (*) security_capset_set(), ->capset_set() Removed in favour of security_capset(). (*) security_capset(), ->capset() New. This is passed a pointer to the new creds, a pointer to the old creds and the proposed capability sets. It should fill in the new creds or return an error. All pointers, barring the pointer to the new creds, are now const. (*) security_bprm_apply_creds(), ->bprm_apply_creds() Changed; now returns a value, which will cause the process to be killed if it's an error. (*) security_task_alloc(), ->task_alloc_security() Removed in favour of security_prepare_creds(). (*) security_cred_free(), ->cred_free() New. Free security data attached to cred->security. (*) security_prepare_creds(), ->cred_prepare() New. Duplicate any security data attached to cred->security. (*) security_commit_creds(), ->cred_commit() New. Apply any security effects for the upcoming installation of new security by commit_creds(). (*) security_task_post_setuid(), ->task_post_setuid() Removed in favour of security_task_fix_setuid(). (*) security_task_fix_setuid(), ->task_fix_setuid() Fix up the proposed new credentials for setuid(). This is used by cap_set_fix_setuid() to implicitly adjust capabilities in line with setuid() changes. Changes are made to the new credentials, rather than the task itself as in security_task_post_setuid(). (*) security_task_reparent_to_init(), ->task_reparent_to_init() Removed. Instead the task being reparented to init is referred directly to init's credentials. NOTE! This results in the loss of some state: SELinux's osid no longer records the sid of the thread that forked it. (*) security_key_alloc(), ->key_alloc() (*) security_key_permission(), ->key_permission() Changed. These now take cred pointers rather than task pointers to refer to the security context. (4) sys_capset(). This has been simplified and uses less locking. The LSM functions it calls have been merged. (5) reparent_to_kthreadd(). This gives the current thread the same credentials as init by simply using commit_thread() to point that way. (6) __sigqueue_alloc() and switch_uid() __sigqueue_alloc() can't stop the target task from changing its creds beneath it, so this function gets a reference to the currently applicable user_struct which it then passes into the sigqueue struct it returns if successful. switch_uid() is now called from commit_creds(), and possibly should be folded into that. commit_creds() should take care of protecting __sigqueue_alloc(). (7) [sg]et[ug]id() and co and [sg]et_current_groups. The set functions now all use prepare_creds(), commit_creds() and abort_creds() to build and check a new set of credentials before applying it. security_task_set[ug]id() is called inside the prepared section. This guarantees that nothing else will affect the creds until we've finished. The calling of set_dumpable() has been moved into commit_creds(). Much of the functionality of set_user() has been moved into commit_creds(). The get functions all simply access the data directly. (8) security_task_prctl() and cap_task_prctl(). security_task_prctl() has been modified to return -ENOSYS if it doesn't want to handle a function, or otherwise return the return value directly rather than through an argument. Additionally, cap_task_prctl() now prepares a new set of credentials, even if it doesn't end up using it. (9) Keyrings. A number of changes have been made to the keyrings code: (a) switch_uid_keyring(), copy_keys(), exit_keys() and suid_keys() have all been dropped and built in to the credentials functions directly. They may want separating out again later. (b) key_alloc() and search_process_keyrings() now take a cred pointer rather than a task pointer to specify the security context. (c) copy_creds() gives a new thread within the same thread group a new thread keyring if its parent had one, otherwise it discards the thread keyring. (d) The authorisation key now points directly to the credentials to extend the search into rather pointing to the task that carries them. (e) Installing thread, process or session keyrings causes a new set of credentials to be created, even though it's not strictly necessary for process or session keyrings (they're shared). (10) Usermode helper. The usermode helper code now carries a cred struct pointer in its subprocess_info struct instead of a new session keyring pointer. This set of credentials is derived from init_cred and installed on the new process after it has been cloned. call_usermodehelper_setup() allocates the new credentials and call_usermodehelper_freeinfo() discards them if they haven't been used. A special cred function (prepare_usermodeinfo_creds()) is provided specifically for call_usermodehelper_setup() to call. call_usermodehelper_setkeys() adjusts the credentials to sport the supplied keyring as the new session keyring. (11) SELinux. SELinux has a number of changes, in addition to those to support the LSM interface changes mentioned above: (a) selinux_setprocattr() no longer does its check for whether the current ptracer can access processes with the new SID inside the lock that covers getting the ptracer's SID. Whilst this lock ensures that the check is done with the ptracer pinned, the result is only valid until the lock is released, so there's no point doing it inside the lock. (12) is_single_threaded(). This function has been extracted from selinux_setprocattr() and put into a file of its own in the lib/ directory as join_session_keyring() now wants to use it too. The code in SELinux just checked to see whether a task shared mm_structs with other tasks (CLONE_VM), but that isn't good enough. We really want to know if they're part of the same thread group (CLONE_THREAD). (13) nfsd. The NFS server daemon now has to use the COW credentials to set the credentials it is going to use. It really needs to pass the credentials down to the functions it calls, but it can't do that until other patches in this series have been applied. Signed-off-by: David Howells <dhowells@redhat.com> Acked-by: James Morris <jmorris@namei.org> Signed-off-by: James Morris <jmorris@namei.org>
2008-11-14 07:39:23 +08:00
* @cred_prepare:
* @new points to the new credentials.
* @old points to the original credentials.
* @gfp indicates the atomicity of any memory allocations.
* Prepare a new set of credentials by copying the data from the old set.
KEYS: Add a keyctl to install a process's session keyring on its parent [try #6] Add a keyctl to install a process's session keyring onto its parent. This replaces the parent's session keyring. Because the COW credential code does not permit one process to change another process's credentials directly, the change is deferred until userspace next starts executing again. Normally this will be after a wait*() syscall. To support this, three new security hooks have been provided: cred_alloc_blank() to allocate unset security creds, cred_transfer() to fill in the blank security creds and key_session_to_parent() - which asks the LSM if the process may replace its parent's session keyring. The replacement may only happen if the process has the same ownership details as its parent, and the process has LINK permission on the session keyring, and the session keyring is owned by the process, and the LSM permits it. Note that this requires alteration to each architecture's notify_resume path. This has been done for all arches barring blackfin, m68k* and xtensa, all of which need assembly alteration to support TIF_NOTIFY_RESUME. This allows the replacement to be performed at the point the parent process resumes userspace execution. This allows the userspace AFS pioctl emulation to fully emulate newpag() and the VIOCSETTOK and VIOCSETTOK2 pioctls, all of which require the ability to alter the parent process's PAG membership. However, since kAFS doesn't use PAGs per se, but rather dumps the keys into the session keyring, the session keyring of the parent must be replaced if, for example, VIOCSETTOK is passed the newpag flag. This can be tested with the following program: #include <stdio.h> #include <stdlib.h> #include <keyutils.h> #define KEYCTL_SESSION_TO_PARENT 18 #define OSERROR(X, S) do { if ((long)(X) == -1) { perror(S); exit(1); } } while(0) int main(int argc, char **argv) { key_serial_t keyring, key; long ret; keyring = keyctl_join_session_keyring(argv[1]); OSERROR(keyring, "keyctl_join_session_keyring"); key = add_key("user", "a", "b", 1, keyring); OSERROR(key, "add_key"); ret = keyctl(KEYCTL_SESSION_TO_PARENT); OSERROR(ret, "KEYCTL_SESSION_TO_PARENT"); return 0; } Compiled and linked with -lkeyutils, you should see something like: [dhowells@andromeda ~]$ keyctl show Session Keyring -3 --alswrv 4043 4043 keyring: _ses 355907932 --alswrv 4043 -1 \_ keyring: _uid.4043 [dhowells@andromeda ~]$ /tmp/newpag [dhowells@andromeda ~]$ keyctl show Session Keyring -3 --alswrv 4043 4043 keyring: _ses 1055658746 --alswrv 4043 4043 \_ user: a [dhowells@andromeda ~]$ /tmp/newpag hello [dhowells@andromeda ~]$ keyctl show Session Keyring -3 --alswrv 4043 4043 keyring: hello 340417692 --alswrv 4043 4043 \_ user: a Where the test program creates a new session keyring, sticks a user key named 'a' into it and then installs it on its parent. Signed-off-by: David Howells <dhowells@redhat.com> Signed-off-by: James Morris <jmorris@namei.org>
2009-09-02 16:14:21 +08:00
* @cred_transfer:
* @new points to the new credentials.
* @old points to the original credentials.
* Transfer data from original creds to new creds
CRED: Allow kernel services to override LSM settings for task actions Allow kernel services to override LSM settings appropriate to the actions performed by a task by duplicating a set of credentials, modifying it and then using task_struct::cred to point to it when performing operations on behalf of a task. This is used, for example, by CacheFiles which has to transparently access the cache on behalf of a process that thinks it is doing, say, NFS accesses with a potentially inappropriate (with respect to accessing the cache) set of credentials. This patch provides two LSM hooks for modifying a task security record: (*) security_kernel_act_as() which allows modification of the security datum with which a task acts on other objects (most notably files). (*) security_kernel_create_files_as() which allows modification of the security datum that is used to initialise the security data on a file that a task creates. The patch also provides four new credentials handling functions, which wrap the LSM functions: (1) prepare_kernel_cred() Prepare a set of credentials for a kernel service to use, based either on a daemon's credentials or on init_cred. All the keyrings are cleared. (2) set_security_override() Set the LSM security ID in a set of credentials to a specific security context, assuming permission from the LSM policy. (3) set_security_override_from_ctx() As (2), but takes the security context as a string. (4) set_create_files_as() Set the file creation LSM security ID in a set of credentials to be the same as that on a particular inode. Signed-off-by: Casey Schaufler <casey@schaufler-ca.com> [Smack changes] Signed-off-by: David Howells <dhowells@redhat.com> Signed-off-by: James Morris <jmorris@namei.org>
2008-11-14 07:39:28 +08:00
* @kernel_act_as:
* Set the credentials for a kernel service to act as (subjective context).
* @new points to the credentials to be modified.
* @secid specifies the security ID to be set
* The current task must be the one that nominated @secid.
* Return 0 if successful.
* @kernel_create_files_as:
* Set the file creation context in a set of credentials to be the same as
* the objective context of the specified inode.
* @new points to the credentials to be modified.
* @inode points to the inode to use as a reference.
* The current task must be the one that nominated @inode.
* Return 0 if successful.
* @kernel_module_request:
* Ability to trigger the kernel to automatically upcall to userspace for
* userspace to load a kernel module with the given name.
* @kmod_name name of the module requested by the kernel
* Return 0 if successful.
CRED: Inaugurate COW credentials Inaugurate copy-on-write credentials management. This uses RCU to manage the credentials pointer in the task_struct with respect to accesses by other tasks. A process may only modify its own credentials, and so does not need locking to access or modify its own credentials. A mutex (cred_replace_mutex) is added to the task_struct to control the effect of PTRACE_ATTACHED on credential calculations, particularly with respect to execve(). With this patch, the contents of an active credentials struct may not be changed directly; rather a new set of credentials must be prepared, modified and committed using something like the following sequence of events: struct cred *new = prepare_creds(); int ret = blah(new); if (ret < 0) { abort_creds(new); return ret; } return commit_creds(new); There are some exceptions to this rule: the keyrings pointed to by the active credentials may be instantiated - keyrings violate the COW rule as managing COW keyrings is tricky, given that it is possible for a task to directly alter the keys in a keyring in use by another task. To help enforce this, various pointers to sets of credentials, such as those in the task_struct, are declared const. The purpose of this is compile-time discouragement of altering credentials through those pointers. Once a set of credentials has been made public through one of these pointers, it may not be modified, except under special circumstances: (1) Its reference count may incremented and decremented. (2) The keyrings to which it points may be modified, but not replaced. The only safe way to modify anything else is to create a replacement and commit using the functions described in Documentation/credentials.txt (which will be added by a later patch). This patch and the preceding patches have been tested with the LTP SELinux testsuite. This patch makes several logical sets of alteration: (1) execve(). This now prepares and commits credentials in various places in the security code rather than altering the current creds directly. (2) Temporary credential overrides. do_coredump() and sys_faccessat() now prepare their own credentials and temporarily override the ones currently on the acting thread, whilst preventing interference from other threads by holding cred_replace_mutex on the thread being dumped. This will be replaced in a future patch by something that hands down the credentials directly to the functions being called, rather than altering the task's objective credentials. (3) LSM interface. A number of functions have been changed, added or removed: (*) security_capset_check(), ->capset_check() (*) security_capset_set(), ->capset_set() Removed in favour of security_capset(). (*) security_capset(), ->capset() New. This is passed a pointer to the new creds, a pointer to the old creds and the proposed capability sets. It should fill in the new creds or return an error. All pointers, barring the pointer to the new creds, are now const. (*) security_bprm_apply_creds(), ->bprm_apply_creds() Changed; now returns a value, which will cause the process to be killed if it's an error. (*) security_task_alloc(), ->task_alloc_security() Removed in favour of security_prepare_creds(). (*) security_cred_free(), ->cred_free() New. Free security data attached to cred->security. (*) security_prepare_creds(), ->cred_prepare() New. Duplicate any security data attached to cred->security. (*) security_commit_creds(), ->cred_commit() New. Apply any security effects for the upcoming installation of new security by commit_creds(). (*) security_task_post_setuid(), ->task_post_setuid() Removed in favour of security_task_fix_setuid(). (*) security_task_fix_setuid(), ->task_fix_setuid() Fix up the proposed new credentials for setuid(). This is used by cap_set_fix_setuid() to implicitly adjust capabilities in line with setuid() changes. Changes are made to the new credentials, rather than the task itself as in security_task_post_setuid(). (*) security_task_reparent_to_init(), ->task_reparent_to_init() Removed. Instead the task being reparented to init is referred directly to init's credentials. NOTE! This results in the loss of some state: SELinux's osid no longer records the sid of the thread that forked it. (*) security_key_alloc(), ->key_alloc() (*) security_key_permission(), ->key_permission() Changed. These now take cred pointers rather than task pointers to refer to the security context. (4) sys_capset(). This has been simplified and uses less locking. The LSM functions it calls have been merged. (5) reparent_to_kthreadd(). This gives the current thread the same credentials as init by simply using commit_thread() to point that way. (6) __sigqueue_alloc() and switch_uid() __sigqueue_alloc() can't stop the target task from changing its creds beneath it, so this function gets a reference to the currently applicable user_struct which it then passes into the sigqueue struct it returns if successful. switch_uid() is now called from commit_creds(), and possibly should be folded into that. commit_creds() should take care of protecting __sigqueue_alloc(). (7) [sg]et[ug]id() and co and [sg]et_current_groups. The set functions now all use prepare_creds(), commit_creds() and abort_creds() to build and check a new set of credentials before applying it. security_task_set[ug]id() is called inside the prepared section. This guarantees that nothing else will affect the creds until we've finished. The calling of set_dumpable() has been moved into commit_creds(). Much of the functionality of set_user() has been moved into commit_creds(). The get functions all simply access the data directly. (8) security_task_prctl() and cap_task_prctl(). security_task_prctl() has been modified to return -ENOSYS if it doesn't want to handle a function, or otherwise return the return value directly rather than through an argument. Additionally, cap_task_prctl() now prepares a new set of credentials, even if it doesn't end up using it. (9) Keyrings. A number of changes have been made to the keyrings code: (a) switch_uid_keyring(), copy_keys(), exit_keys() and suid_keys() have all been dropped and built in to the credentials functions directly. They may want separating out again later. (b) key_alloc() and search_process_keyrings() now take a cred pointer rather than a task pointer to specify the security context. (c) copy_creds() gives a new thread within the same thread group a new thread keyring if its parent had one, otherwise it discards the thread keyring. (d) The authorisation key now points directly to the credentials to extend the search into rather pointing to the task that carries them. (e) Installing thread, process or session keyrings causes a new set of credentials to be created, even though it's not strictly necessary for process or session keyrings (they're shared). (10) Usermode helper. The usermode helper code now carries a cred struct pointer in its subprocess_info struct instead of a new session keyring pointer. This set of credentials is derived from init_cred and installed on the new process after it has been cloned. call_usermodehelper_setup() allocates the new credentials and call_usermodehelper_freeinfo() discards them if they haven't been used. A special cred function (prepare_usermodeinfo_creds()) is provided specifically for call_usermodehelper_setup() to call. call_usermodehelper_setkeys() adjusts the credentials to sport the supplied keyring as the new session keyring. (11) SELinux. SELinux has a number of changes, in addition to those to support the LSM interface changes mentioned above: (a) selinux_setprocattr() no longer does its check for whether the current ptracer can access processes with the new SID inside the lock that covers getting the ptracer's SID. Whilst this lock ensures that the check is done with the ptracer pinned, the result is only valid until the lock is released, so there's no point doing it inside the lock. (12) is_single_threaded(). This function has been extracted from selinux_setprocattr() and put into a file of its own in the lib/ directory as join_session_keyring() now wants to use it too. The code in SELinux just checked to see whether a task shared mm_structs with other tasks (CLONE_VM), but that isn't good enough. We really want to know if they're part of the same thread group (CLONE_THREAD). (13) nfsd. The NFS server daemon now has to use the COW credentials to set the credentials it is going to use. It really needs to pass the credentials down to the functions it calls, but it can't do that until other patches in this series have been applied. Signed-off-by: David Howells <dhowells@redhat.com> Acked-by: James Morris <jmorris@namei.org> Signed-off-by: James Morris <jmorris@namei.org>
2008-11-14 07:39:23 +08:00
* @task_fix_setuid:
* Update the module's state after setting one or more of the user
* identity attributes of the current process. The @flags parameter
* indicates which of the set*uid system calls invoked this hook. If
CRED: Inaugurate COW credentials Inaugurate copy-on-write credentials management. This uses RCU to manage the credentials pointer in the task_struct with respect to accesses by other tasks. A process may only modify its own credentials, and so does not need locking to access or modify its own credentials. A mutex (cred_replace_mutex) is added to the task_struct to control the effect of PTRACE_ATTACHED on credential calculations, particularly with respect to execve(). With this patch, the contents of an active credentials struct may not be changed directly; rather a new set of credentials must be prepared, modified and committed using something like the following sequence of events: struct cred *new = prepare_creds(); int ret = blah(new); if (ret < 0) { abort_creds(new); return ret; } return commit_creds(new); There are some exceptions to this rule: the keyrings pointed to by the active credentials may be instantiated - keyrings violate the COW rule as managing COW keyrings is tricky, given that it is possible for a task to directly alter the keys in a keyring in use by another task. To help enforce this, various pointers to sets of credentials, such as those in the task_struct, are declared const. The purpose of this is compile-time discouragement of altering credentials through those pointers. Once a set of credentials has been made public through one of these pointers, it may not be modified, except under special circumstances: (1) Its reference count may incremented and decremented. (2) The keyrings to which it points may be modified, but not replaced. The only safe way to modify anything else is to create a replacement and commit using the functions described in Documentation/credentials.txt (which will be added by a later patch). This patch and the preceding patches have been tested with the LTP SELinux testsuite. This patch makes several logical sets of alteration: (1) execve(). This now prepares and commits credentials in various places in the security code rather than altering the current creds directly. (2) Temporary credential overrides. do_coredump() and sys_faccessat() now prepare their own credentials and temporarily override the ones currently on the acting thread, whilst preventing interference from other threads by holding cred_replace_mutex on the thread being dumped. This will be replaced in a future patch by something that hands down the credentials directly to the functions being called, rather than altering the task's objective credentials. (3) LSM interface. A number of functions have been changed, added or removed: (*) security_capset_check(), ->capset_check() (*) security_capset_set(), ->capset_set() Removed in favour of security_capset(). (*) security_capset(), ->capset() New. This is passed a pointer to the new creds, a pointer to the old creds and the proposed capability sets. It should fill in the new creds or return an error. All pointers, barring the pointer to the new creds, are now const. (*) security_bprm_apply_creds(), ->bprm_apply_creds() Changed; now returns a value, which will cause the process to be killed if it's an error. (*) security_task_alloc(), ->task_alloc_security() Removed in favour of security_prepare_creds(). (*) security_cred_free(), ->cred_free() New. Free security data attached to cred->security. (*) security_prepare_creds(), ->cred_prepare() New. Duplicate any security data attached to cred->security. (*) security_commit_creds(), ->cred_commit() New. Apply any security effects for the upcoming installation of new security by commit_creds(). (*) security_task_post_setuid(), ->task_post_setuid() Removed in favour of security_task_fix_setuid(). (*) security_task_fix_setuid(), ->task_fix_setuid() Fix up the proposed new credentials for setuid(). This is used by cap_set_fix_setuid() to implicitly adjust capabilities in line with setuid() changes. Changes are made to the new credentials, rather than the task itself as in security_task_post_setuid(). (*) security_task_reparent_to_init(), ->task_reparent_to_init() Removed. Instead the task being reparented to init is referred directly to init's credentials. NOTE! This results in the loss of some state: SELinux's osid no longer records the sid of the thread that forked it. (*) security_key_alloc(), ->key_alloc() (*) security_key_permission(), ->key_permission() Changed. These now take cred pointers rather than task pointers to refer to the security context. (4) sys_capset(). This has been simplified and uses less locking. The LSM functions it calls have been merged. (5) reparent_to_kthreadd(). This gives the current thread the same credentials as init by simply using commit_thread() to point that way. (6) __sigqueue_alloc() and switch_uid() __sigqueue_alloc() can't stop the target task from changing its creds beneath it, so this function gets a reference to the currently applicable user_struct which it then passes into the sigqueue struct it returns if successful. switch_uid() is now called from commit_creds(), and possibly should be folded into that. commit_creds() should take care of protecting __sigqueue_alloc(). (7) [sg]et[ug]id() and co and [sg]et_current_groups. The set functions now all use prepare_creds(), commit_creds() and abort_creds() to build and check a new set of credentials before applying it. security_task_set[ug]id() is called inside the prepared section. This guarantees that nothing else will affect the creds until we've finished. The calling of set_dumpable() has been moved into commit_creds(). Much of the functionality of set_user() has been moved into commit_creds(). The get functions all simply access the data directly. (8) security_task_prctl() and cap_task_prctl(). security_task_prctl() has been modified to return -ENOSYS if it doesn't want to handle a function, or otherwise return the return value directly rather than through an argument. Additionally, cap_task_prctl() now prepares a new set of credentials, even if it doesn't end up using it. (9) Keyrings. A number of changes have been made to the keyrings code: (a) switch_uid_keyring(), copy_keys(), exit_keys() and suid_keys() have all been dropped and built in to the credentials functions directly. They may want separating out again later. (b) key_alloc() and search_process_keyrings() now take a cred pointer rather than a task pointer to specify the security context. (c) copy_creds() gives a new thread within the same thread group a new thread keyring if its parent had one, otherwise it discards the thread keyring. (d) The authorisation key now points directly to the credentials to extend the search into rather pointing to the task that carries them. (e) Installing thread, process or session keyrings causes a new set of credentials to be created, even though it's not strictly necessary for process or session keyrings (they're shared). (10) Usermode helper. The usermode helper code now carries a cred struct pointer in its subprocess_info struct instead of a new session keyring pointer. This set of credentials is derived from init_cred and installed on the new process after it has been cloned. call_usermodehelper_setup() allocates the new credentials and call_usermodehelper_freeinfo() discards them if they haven't been used. A special cred function (prepare_usermodeinfo_creds()) is provided specifically for call_usermodehelper_setup() to call. call_usermodehelper_setkeys() adjusts the credentials to sport the supplied keyring as the new session keyring. (11) SELinux. SELinux has a number of changes, in addition to those to support the LSM interface changes mentioned above: (a) selinux_setprocattr() no longer does its check for whether the current ptracer can access processes with the new SID inside the lock that covers getting the ptracer's SID. Whilst this lock ensures that the check is done with the ptracer pinned, the result is only valid until the lock is released, so there's no point doing it inside the lock. (12) is_single_threaded(). This function has been extracted from selinux_setprocattr() and put into a file of its own in the lib/ directory as join_session_keyring() now wants to use it too. The code in SELinux just checked to see whether a task shared mm_structs with other tasks (CLONE_VM), but that isn't good enough. We really want to know if they're part of the same thread group (CLONE_THREAD). (13) nfsd. The NFS server daemon now has to use the COW credentials to set the credentials it is going to use. It really needs to pass the credentials down to the functions it calls, but it can't do that until other patches in this series have been applied. Signed-off-by: David Howells <dhowells@redhat.com> Acked-by: James Morris <jmorris@namei.org> Signed-off-by: James Morris <jmorris@namei.org>
2008-11-14 07:39:23 +08:00
* @new is the set of credentials that will be installed. Modifications
* should be made to this rather than to @current->cred.
* @old is the set of credentials that are being replaces
* @flags contains one of the LSM_SETID_* values.
* Return 0 on success.
* @task_setpgid:
* Check permission before setting the process group identifier of the
* process @p to @pgid.
* @p contains the task_struct for process being modified.
* @pgid contains the new pgid.
* Return 0 if permission is granted.
* @task_getpgid:
* Check permission before getting the process group identifier of the
* process @p.
* @p contains the task_struct for the process.
* Return 0 if permission is granted.
* @task_getsid:
* Check permission before getting the session identifier of the process
* @p.
* @p contains the task_struct for the process.
* Return 0 if permission is granted.
* @task_getsecid:
* Retrieve the security identifier of the process @p.
* @p contains the task_struct for the process and place is into @secid.
* In case of failure, @secid will be set to zero.
*
* @task_setnice:
* Check permission before setting the nice value of @p to @nice.
* @p contains the task_struct of process.
* @nice contains the new nice value.
* Return 0 if permission is granted.
* @task_setioprio
* Check permission before setting the ioprio value of @p to @ioprio.
* @p contains the task_struct of process.
* @ioprio contains the new ioprio value
* Return 0 if permission is granted.
* @task_getioprio
* Check permission before getting the ioprio value of @p.
* @p contains the task_struct of process.
* Return 0 if permission is granted.
* @task_setrlimit:
* Check permission before setting the resource limits of the current
* process for @resource to @new_rlim. The old resource limit values can
* be examined by dereferencing (current->signal->rlim + resource).
* @resource contains the resource whose limit is being set.
* @new_rlim contains the new limits for @resource.
* Return 0 if permission is granted.
* @task_setscheduler:
* Check permission before setting scheduling policy and/or parameters of
* process @p based on @policy and @lp.
* @p contains the task_struct for process.
* @policy contains the scheduling policy.
* @lp contains the scheduling parameters.
* Return 0 if permission is granted.
* @task_getscheduler:
* Check permission before obtaining scheduling information for process
* @p.
* @p contains the task_struct for process.
* Return 0 if permission is granted.
* @task_movememory
* Check permission before moving memory owned by process @p.
* @p contains the task_struct for process.
* Return 0 if permission is granted.
* @task_kill:
* Check permission before sending signal @sig to @p. @info can be NULL,
* the constant 1, or a pointer to a siginfo structure. If @info is 1 or
* SI_FROMKERNEL(info) is true, then the signal should be viewed as coming
* from the kernel and should typically be permitted.
* SIGIO signals are handled separately by the send_sigiotask hook in
* file_security_ops.
* @p contains the task_struct for process.
* @info contains the signal information.
* @sig contains the signal value.
* @secid contains the sid of the process where the signal originated
* Return 0 if permission is granted.
* @task_wait:
* Check permission before allowing a process to reap a child process @p
* and collect its status information.
* @p contains the task_struct for process.
* Return 0 if permission is granted.
* @task_prctl:
* Check permission before performing a process control operation on the
* current process.
* @option contains the operation.
* @arg2 contains a argument.
* @arg3 contains a argument.
* @arg4 contains a argument.
* @arg5 contains a argument.
CRED: Inaugurate COW credentials Inaugurate copy-on-write credentials management. This uses RCU to manage the credentials pointer in the task_struct with respect to accesses by other tasks. A process may only modify its own credentials, and so does not need locking to access or modify its own credentials. A mutex (cred_replace_mutex) is added to the task_struct to control the effect of PTRACE_ATTACHED on credential calculations, particularly with respect to execve(). With this patch, the contents of an active credentials struct may not be changed directly; rather a new set of credentials must be prepared, modified and committed using something like the following sequence of events: struct cred *new = prepare_creds(); int ret = blah(new); if (ret < 0) { abort_creds(new); return ret; } return commit_creds(new); There are some exceptions to this rule: the keyrings pointed to by the active credentials may be instantiated - keyrings violate the COW rule as managing COW keyrings is tricky, given that it is possible for a task to directly alter the keys in a keyring in use by another task. To help enforce this, various pointers to sets of credentials, such as those in the task_struct, are declared const. The purpose of this is compile-time discouragement of altering credentials through those pointers. Once a set of credentials has been made public through one of these pointers, it may not be modified, except under special circumstances: (1) Its reference count may incremented and decremented. (2) The keyrings to which it points may be modified, but not replaced. The only safe way to modify anything else is to create a replacement and commit using the functions described in Documentation/credentials.txt (which will be added by a later patch). This patch and the preceding patches have been tested with the LTP SELinux testsuite. This patch makes several logical sets of alteration: (1) execve(). This now prepares and commits credentials in various places in the security code rather than altering the current creds directly. (2) Temporary credential overrides. do_coredump() and sys_faccessat() now prepare their own credentials and temporarily override the ones currently on the acting thread, whilst preventing interference from other threads by holding cred_replace_mutex on the thread being dumped. This will be replaced in a future patch by something that hands down the credentials directly to the functions being called, rather than altering the task's objective credentials. (3) LSM interface. A number of functions have been changed, added or removed: (*) security_capset_check(), ->capset_check() (*) security_capset_set(), ->capset_set() Removed in favour of security_capset(). (*) security_capset(), ->capset() New. This is passed a pointer to the new creds, a pointer to the old creds and the proposed capability sets. It should fill in the new creds or return an error. All pointers, barring the pointer to the new creds, are now const. (*) security_bprm_apply_creds(), ->bprm_apply_creds() Changed; now returns a value, which will cause the process to be killed if it's an error. (*) security_task_alloc(), ->task_alloc_security() Removed in favour of security_prepare_creds(). (*) security_cred_free(), ->cred_free() New. Free security data attached to cred->security. (*) security_prepare_creds(), ->cred_prepare() New. Duplicate any security data attached to cred->security. (*) security_commit_creds(), ->cred_commit() New. Apply any security effects for the upcoming installation of new security by commit_creds(). (*) security_task_post_setuid(), ->task_post_setuid() Removed in favour of security_task_fix_setuid(). (*) security_task_fix_setuid(), ->task_fix_setuid() Fix up the proposed new credentials for setuid(). This is used by cap_set_fix_setuid() to implicitly adjust capabilities in line with setuid() changes. Changes are made to the new credentials, rather than the task itself as in security_task_post_setuid(). (*) security_task_reparent_to_init(), ->task_reparent_to_init() Removed. Instead the task being reparented to init is referred directly to init's credentials. NOTE! This results in the loss of some state: SELinux's osid no longer records the sid of the thread that forked it. (*) security_key_alloc(), ->key_alloc() (*) security_key_permission(), ->key_permission() Changed. These now take cred pointers rather than task pointers to refer to the security context. (4) sys_capset(). This has been simplified and uses less locking. The LSM functions it calls have been merged. (5) reparent_to_kthreadd(). This gives the current thread the same credentials as init by simply using commit_thread() to point that way. (6) __sigqueue_alloc() and switch_uid() __sigqueue_alloc() can't stop the target task from changing its creds beneath it, so this function gets a reference to the currently applicable user_struct which it then passes into the sigqueue struct it returns if successful. switch_uid() is now called from commit_creds(), and possibly should be folded into that. commit_creds() should take care of protecting __sigqueue_alloc(). (7) [sg]et[ug]id() and co and [sg]et_current_groups. The set functions now all use prepare_creds(), commit_creds() and abort_creds() to build and check a new set of credentials before applying it. security_task_set[ug]id() is called inside the prepared section. This guarantees that nothing else will affect the creds until we've finished. The calling of set_dumpable() has been moved into commit_creds(). Much of the functionality of set_user() has been moved into commit_creds(). The get functions all simply access the data directly. (8) security_task_prctl() and cap_task_prctl(). security_task_prctl() has been modified to return -ENOSYS if it doesn't want to handle a function, or otherwise return the return value directly rather than through an argument. Additionally, cap_task_prctl() now prepares a new set of credentials, even if it doesn't end up using it. (9) Keyrings. A number of changes have been made to the keyrings code: (a) switch_uid_keyring(), copy_keys(), exit_keys() and suid_keys() have all been dropped and built in to the credentials functions directly. They may want separating out again later. (b) key_alloc() and search_process_keyrings() now take a cred pointer rather than a task pointer to specify the security context. (c) copy_creds() gives a new thread within the same thread group a new thread keyring if its parent had one, otherwise it discards the thread keyring. (d) The authorisation key now points directly to the credentials to extend the search into rather pointing to the task that carries them. (e) Installing thread, process or session keyrings causes a new set of credentials to be created, even though it's not strictly necessary for process or session keyrings (they're shared). (10) Usermode helper. The usermode helper code now carries a cred struct pointer in its subprocess_info struct instead of a new session keyring pointer. This set of credentials is derived from init_cred and installed on the new process after it has been cloned. call_usermodehelper_setup() allocates the new credentials and call_usermodehelper_freeinfo() discards them if they haven't been used. A special cred function (prepare_usermodeinfo_creds()) is provided specifically for call_usermodehelper_setup() to call. call_usermodehelper_setkeys() adjusts the credentials to sport the supplied keyring as the new session keyring. (11) SELinux. SELinux has a number of changes, in addition to those to support the LSM interface changes mentioned above: (a) selinux_setprocattr() no longer does its check for whether the current ptracer can access processes with the new SID inside the lock that covers getting the ptracer's SID. Whilst this lock ensures that the check is done with the ptracer pinned, the result is only valid until the lock is released, so there's no point doing it inside the lock. (12) is_single_threaded(). This function has been extracted from selinux_setprocattr() and put into a file of its own in the lib/ directory as join_session_keyring() now wants to use it too. The code in SELinux just checked to see whether a task shared mm_structs with other tasks (CLONE_VM), but that isn't good enough. We really want to know if they're part of the same thread group (CLONE_THREAD). (13) nfsd. The NFS server daemon now has to use the COW credentials to set the credentials it is going to use. It really needs to pass the credentials down to the functions it calls, but it can't do that until other patches in this series have been applied. Signed-off-by: David Howells <dhowells@redhat.com> Acked-by: James Morris <jmorris@namei.org> Signed-off-by: James Morris <jmorris@namei.org>
2008-11-14 07:39:23 +08:00
* Return -ENOSYS if no-one wanted to handle this op, any other value to
* cause prctl() to return immediately with that value.
* @task_to_inode:
* Set the security attributes for an inode based on an associated task's
* security attributes, e.g. for /proc/pid inodes.
* @p contains the task_struct for the task.
* @inode contains the inode structure for the inode.
*
* Security hooks for Netlink messaging.
*
* @netlink_send:
* Save security information for a netlink message so that permission
* checking can be performed when the message is processed. The security
* information can be saved using the eff_cap field of the
* netlink_skb_parms structure. Also may be used to provide fine
* grained control over message transmission.
* @sk associated sock of task sending the message.,
* @skb contains the sk_buff structure for the netlink message.
* Return 0 if the information was successfully saved and message
* is allowed to be transmitted.
* @netlink_recv:
* Check permission before processing the received netlink message in
* @skb.
* @skb contains the sk_buff structure for the netlink message.
* @cap indicates the capability required
* Return 0 if permission is granted.
*
* Security hooks for Unix domain networking.
*
* @unix_stream_connect:
* Check permissions before establishing a Unix domain stream connection
* between @sock and @other.
* @sock contains the sock structure.
* @other contains the peer sock structure.
* @newsk contains the new sock structure.
* Return 0 if permission is granted.
* @unix_may_send:
* Check permissions before connecting or sending datagrams from @sock to
* @other.
* @sock contains the socket structure.
* @sock contains the peer socket structure.
* Return 0 if permission is granted.
*
* The @unix_stream_connect and @unix_may_send hooks were necessary because
* Linux provides an alternative to the conventional file name space for Unix
* domain sockets. Whereas binding and connecting to sockets in the file name
* space is mediated by the typical file permissions (and caught by the mknod
* and permission hooks in inode_security_ops), binding and connecting to
* sockets in the abstract name space is completely unmediated. Sufficient
* control of Unix domain sockets in the abstract name space isn't possible
* using only the socket layer hooks, since we need to know the actual target
* socket, which is not looked up until we are inside the af_unix code.
*
* Security hooks for socket operations.
*
* @socket_create:
* Check permissions prior to creating a new socket.
* @family contains the requested protocol family.
* @type contains the requested communications type.
* @protocol contains the requested protocol.
* @kern set to 1 if a kernel socket.
* Return 0 if permission is granted.
* @socket_post_create:
* This hook allows a module to update or allocate a per-socket security
* structure. Note that the security field was not added directly to the
* socket structure, but rather, the socket security information is stored
* in the associated inode. Typically, the inode alloc_security hook will
* allocate and and attach security information to
* sock->inode->i_security. This hook may be used to update the
* sock->inode->i_security field with additional information that wasn't
* available when the inode was allocated.
* @sock contains the newly created socket structure.
* @family contains the requested protocol family.
* @type contains the requested communications type.
* @protocol contains the requested protocol.
* @kern set to 1 if a kernel socket.
* @socket_bind:
* Check permission before socket protocol layer bind operation is
* performed and the socket @sock is bound to the address specified in the
* @address parameter.
* @sock contains the socket structure.
* @address contains the address to bind to.
* @addrlen contains the length of address.
* Return 0 if permission is granted.
* @socket_connect:
* Check permission before socket protocol layer connect operation
* attempts to connect socket @sock to a remote address, @address.
* @sock contains the socket structure.
* @address contains the address of remote endpoint.
* @addrlen contains the length of address.
* Return 0 if permission is granted.
* @socket_listen:
* Check permission before socket protocol layer listen operation.
* @sock contains the socket structure.
* @backlog contains the maximum length for the pending connection queue.
* Return 0 if permission is granted.
* @socket_accept:
* Check permission before accepting a new connection. Note that the new
* socket, @newsock, has been created and some information copied to it,
* but the accept operation has not actually been performed.
* @sock contains the listening socket structure.
* @newsock contains the newly created server socket for connection.
* Return 0 if permission is granted.
* @socket_sendmsg:
* Check permission before transmitting a message to another socket.
* @sock contains the socket structure.
* @msg contains the message to be transmitted.
* @size contains the size of message.
* Return 0 if permission is granted.
* @socket_recvmsg:
* Check permission before receiving a message from a socket.
* @sock contains the socket structure.
* @msg contains the message structure.
* @size contains the size of message structure.
* @flags contains the operational flags.
* Return 0 if permission is granted.
* @socket_getsockname:
* Check permission before the local address (name) of the socket object
* @sock is retrieved.
* @sock contains the socket structure.
* Return 0 if permission is granted.
* @socket_getpeername:
* Check permission before the remote address (name) of a socket object
* @sock is retrieved.
* @sock contains the socket structure.
* Return 0 if permission is granted.
* @socket_getsockopt:
* Check permissions before retrieving the options associated with socket
* @sock.
* @sock contains the socket structure.
* @level contains the protocol level to retrieve option from.
* @optname contains the name of option to retrieve.
* Return 0 if permission is granted.
* @socket_setsockopt:
* Check permissions before setting the options associated with socket
* @sock.
* @sock contains the socket structure.
* @level contains the protocol level to set options for.
* @optname contains the name of the option to set.
* Return 0 if permission is granted.
* @socket_shutdown:
* Checks permission before all or part of a connection on the socket
* @sock is shut down.
* @sock contains the socket structure.
* @how contains the flag indicating how future sends and receives are handled.
* Return 0 if permission is granted.
* @socket_sock_rcv_skb:
* Check permissions on incoming network packets. This hook is distinct
* from Netfilter's IP input hooks since it is the first time that the
* incoming sk_buff @skb has been associated with a particular socket, @sk.
* Must not sleep inside this hook because some callers hold spinlocks.
* @sk contains the sock (not socket) associated with the incoming sk_buff.
* @skb contains the incoming network data.
* @socket_getpeersec_stream:
* This hook allows the security module to provide peer socket security
* state for unix or connected tcp sockets to userspace via getsockopt
* SO_GETPEERSEC. For tcp sockets this can be meaningful if the
* socket is associated with an ipsec SA.
* @sock is the local socket.
* @optval userspace memory where the security state is to be copied.
* @optlen userspace int where the module should copy the actual length
* of the security state.
* @len as input is the maximum length to copy to userspace provided
* by the caller.
* Return 0 if all is well, otherwise, typical getsockopt return
* values.
* @socket_getpeersec_dgram:
* This hook allows the security module to provide peer socket security
* state for udp sockets on a per-packet basis to userspace via
* getsockopt SO_GETPEERSEC. The application must first have indicated
* the IP_PASSSEC option via getsockopt. It can then retrieve the
* security state returned by this hook for a packet via the SCM_SECURITY
* ancillary message type.
* @skb is the skbuff for the packet being queried
* @secdata is a pointer to a buffer in which to copy the security data
* @seclen is the maximum length for @secdata
* Return 0 on success, error on failure.
* @sk_alloc_security:
* Allocate and attach a security structure to the sk->sk_security field,
* which is used to copy security attributes between local stream sockets.
* @sk_free_security:
* Deallocate security structure.
* @sk_clone_security:
* Clone/copy security structure.
* @sk_getsecid:
* Retrieve the LSM-specific secid for the sock to enable caching of network
[LSM-IPSec]: Security association restriction. This patch series implements per packet access control via the extension of the Linux Security Modules (LSM) interface by hooks in the XFRM and pfkey subsystems that leverage IPSec security associations to label packets. Extensions to the SELinux LSM are included that leverage the patch for this purpose. This patch implements the changes necessary to the XFRM subsystem, pfkey interface, ipv4/ipv6, and xfrm_user interface to restrict a socket to use only authorized security associations (or no security association) to send/receive network packets. Patch purpose: The patch is designed to enable access control per packets based on the strongly authenticated IPSec security association. Such access controls augment the existing ones based on network interface and IP address. The former are very coarse-grained, and the latter can be spoofed. By using IPSec, the system can control access to remote hosts based on cryptographic keys generated using the IPSec mechanism. This enables access control on a per-machine basis or per-application if the remote machine is running the same mechanism and trusted to enforce the access control policy. Patch design approach: The overall approach is that policy (xfrm_policy) entries set by user-level programs (e.g., setkey for ipsec-tools) are extended with a security context that is used at policy selection time in the XFRM subsystem to restrict the sockets that can send/receive packets via security associations (xfrm_states) that are built from those policies. A presentation available at www.selinux-symposium.org/2005/presentations/session2/2-3-jaeger.pdf from the SELinux symposium describes the overall approach. Patch implementation details: On output, the policy retrieved (via xfrm_policy_lookup or xfrm_sk_policy_lookup) must be authorized for the security context of the socket and the same security context is required for resultant security association (retrieved or negotiated via racoon in ipsec-tools). This is enforced in xfrm_state_find. On input, the policy retrieved must also be authorized for the socket (at __xfrm_policy_check), and the security context of the policy must also match the security association being used. The patch has virtually no impact on packets that do not use IPSec. The existing Netfilter (outgoing) and LSM rcv_skb hooks are used as before. Also, if IPSec is used without security contexts, the impact is minimal. The LSM must allow such policies to be selected for the combination of socket and remote machine, but subsequent IPSec processing proceeds as in the original case. Testing: The pfkey interface is tested using the ipsec-tools. ipsec-tools have been modified (a separate ipsec-tools patch is available for version 0.5) that supports assignment of xfrm_policy entries and security associations with security contexts via setkey and the negotiation using the security contexts via racoon. The xfrm_user interface is tested via ad hoc programs that set security contexts. These programs are also available from me, and contain programs for setting, getting, and deleting policy for testing this interface. Testing of sa functions was done by tracing kernel behavior. Signed-off-by: Trent Jaeger <tjaeger@cse.psu.edu> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> Signed-off-by: David S. Miller <davem@davemloft.net>
2005-12-14 15:12:27 +08:00
* authorizations.
* @sock_graft:
* Sets the socket's isec sid to the sock's sid.
* @inet_conn_request:
* Sets the openreq's sid to socket's sid with MLS portion taken from peer sid.
* @inet_csk_clone:
* Sets the new child socket's sid to the openreq sid.
* @inet_conn_established:
* Sets the connection's peersid to the secmark on skb.
* @secmark_relabel_packet:
* check if the process should be allowed to relabel packets to the given secid
* @security_secmark_refcount_inc
* tells the LSM to increment the number of secmark labeling rules loaded
* @security_secmark_refcount_dec
* tells the LSM to decrement the number of secmark labeling rules loaded
* @req_classify_flow:
* Sets the flow's sid to the openreq sid.
* @tun_dev_create:
* Check permissions prior to creating a new TUN device.
* @tun_dev_post_create:
* This hook allows a module to update or allocate a per-socket security
* structure.
* @sk contains the newly created sock structure.
* @tun_dev_attach:
* Check permissions prior to attaching to a persistent TUN device. This
* hook can also be used by the module to update any security state
* associated with the TUN device's sock structure.
* @sk contains the existing sock structure.
[LSM-IPSec]: Security association restriction. This patch series implements per packet access control via the extension of the Linux Security Modules (LSM) interface by hooks in the XFRM and pfkey subsystems that leverage IPSec security associations to label packets. Extensions to the SELinux LSM are included that leverage the patch for this purpose. This patch implements the changes necessary to the XFRM subsystem, pfkey interface, ipv4/ipv6, and xfrm_user interface to restrict a socket to use only authorized security associations (or no security association) to send/receive network packets. Patch purpose: The patch is designed to enable access control per packets based on the strongly authenticated IPSec security association. Such access controls augment the existing ones based on network interface and IP address. The former are very coarse-grained, and the latter can be spoofed. By using IPSec, the system can control access to remote hosts based on cryptographic keys generated using the IPSec mechanism. This enables access control on a per-machine basis or per-application if the remote machine is running the same mechanism and trusted to enforce the access control policy. Patch design approach: The overall approach is that policy (xfrm_policy) entries set by user-level programs (e.g., setkey for ipsec-tools) are extended with a security context that is used at policy selection time in the XFRM subsystem to restrict the sockets that can send/receive packets via security associations (xfrm_states) that are built from those policies. A presentation available at www.selinux-symposium.org/2005/presentations/session2/2-3-jaeger.pdf from the SELinux symposium describes the overall approach. Patch implementation details: On output, the policy retrieved (via xfrm_policy_lookup or xfrm_sk_policy_lookup) must be authorized for the security context of the socket and the same security context is required for resultant security association (retrieved or negotiated via racoon in ipsec-tools). This is enforced in xfrm_state_find. On input, the policy retrieved must also be authorized for the socket (at __xfrm_policy_check), and the security context of the policy must also match the security association being used. The patch has virtually no impact on packets that do not use IPSec. The existing Netfilter (outgoing) and LSM rcv_skb hooks are used as before. Also, if IPSec is used without security contexts, the impact is minimal. The LSM must allow such policies to be selected for the combination of socket and remote machine, but subsequent IPSec processing proceeds as in the original case. Testing: The pfkey interface is tested using the ipsec-tools. ipsec-tools have been modified (a separate ipsec-tools patch is available for version 0.5) that supports assignment of xfrm_policy entries and security associations with security contexts via setkey and the negotiation using the security contexts via racoon. The xfrm_user interface is tested via ad hoc programs that set security contexts. These programs are also available from me, and contain programs for setting, getting, and deleting policy for testing this interface. Testing of sa functions was done by tracing kernel behavior. Signed-off-by: Trent Jaeger <tjaeger@cse.psu.edu> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> Signed-off-by: David S. Miller <davem@davemloft.net>
2005-12-14 15:12:27 +08:00
*
* Security hooks for XFRM operations.
*
* @xfrm_policy_alloc_security:
* @ctxp is a pointer to the xfrm_sec_ctx being added to Security Policy
* Database used by the XFRM system.
[LSM-IPSec]: Security association restriction. This patch series implements per packet access control via the extension of the Linux Security Modules (LSM) interface by hooks in the XFRM and pfkey subsystems that leverage IPSec security associations to label packets. Extensions to the SELinux LSM are included that leverage the patch for this purpose. This patch implements the changes necessary to the XFRM subsystem, pfkey interface, ipv4/ipv6, and xfrm_user interface to restrict a socket to use only authorized security associations (or no security association) to send/receive network packets. Patch purpose: The patch is designed to enable access control per packets based on the strongly authenticated IPSec security association. Such access controls augment the existing ones based on network interface and IP address. The former are very coarse-grained, and the latter can be spoofed. By using IPSec, the system can control access to remote hosts based on cryptographic keys generated using the IPSec mechanism. This enables access control on a per-machine basis or per-application if the remote machine is running the same mechanism and trusted to enforce the access control policy. Patch design approach: The overall approach is that policy (xfrm_policy) entries set by user-level programs (e.g., setkey for ipsec-tools) are extended with a security context that is used at policy selection time in the XFRM subsystem to restrict the sockets that can send/receive packets via security associations (xfrm_states) that are built from those policies. A presentation available at www.selinux-symposium.org/2005/presentations/session2/2-3-jaeger.pdf from the SELinux symposium describes the overall approach. Patch implementation details: On output, the policy retrieved (via xfrm_policy_lookup or xfrm_sk_policy_lookup) must be authorized for the security context of the socket and the same security context is required for resultant security association (retrieved or negotiated via racoon in ipsec-tools). This is enforced in xfrm_state_find. On input, the policy retrieved must also be authorized for the socket (at __xfrm_policy_check), and the security context of the policy must also match the security association being used. The patch has virtually no impact on packets that do not use IPSec. The existing Netfilter (outgoing) and LSM rcv_skb hooks are used as before. Also, if IPSec is used without security contexts, the impact is minimal. The LSM must allow such policies to be selected for the combination of socket and remote machine, but subsequent IPSec processing proceeds as in the original case. Testing: The pfkey interface is tested using the ipsec-tools. ipsec-tools have been modified (a separate ipsec-tools patch is available for version 0.5) that supports assignment of xfrm_policy entries and security associations with security contexts via setkey and the negotiation using the security contexts via racoon. The xfrm_user interface is tested via ad hoc programs that set security contexts. These programs are also available from me, and contain programs for setting, getting, and deleting policy for testing this interface. Testing of sa functions was done by tracing kernel behavior. Signed-off-by: Trent Jaeger <tjaeger@cse.psu.edu> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> Signed-off-by: David S. Miller <davem@davemloft.net>
2005-12-14 15:12:27 +08:00
* @sec_ctx contains the security context information being provided by
* the user-level policy update program (e.g., setkey).
* Allocate a security structure to the xp->security field; the security
SELinux: Various xfrm labeling fixes Since the upstreaming of the mlsxfrm modification a few months back, testing has resulted in the identification of the following issues/bugs that are resolved in this patch set. 1. Fix the security context used in the IKE negotiation to be the context of the socket as opposed to the context of the SPD rule. 2. Fix SO_PEERSEC for tcp sockets to return the security context of the peer as opposed to the source. 3. Fix the selection of an SA for an outgoing packet to be at the same context as the originating socket/flow. The following would be the result of applying this patchset: - SO_PEERSEC will now correctly return the peer's context. - IKE deamons will receive the context of the source socket/flow as opposed to the SPD rule's context so that the negotiated SA will be at the same context as the source socket/flow. - The SELinux policy will require one or more of the following for a socket to be able to communicate with/without SAs: 1. To enable a socket to communicate without using labeled-IPSec SAs: allow socket_t unlabeled_t:association { sendto recvfrom } 2. To enable a socket to communicate with labeled-IPSec SAs: allow socket_t self:association { sendto }; allow socket_t peer_sa_t:association { recvfrom }; This Patch: Pass correct security context to IKE for use in negotiation Fix the security context passed to IKE for use in negotiation to be the context of the socket as opposed to the context of the SPD rule so that the SA carries the label of the originating socket/flow. Signed-off-by: Venkat Yekkirala <vyekkirala@TrustedCS.com> Signed-off-by: James Morris <jmorris@namei.org>
2006-11-09 07:03:44 +08:00
* field is initialized to NULL when the xfrm_policy is allocated.
[LSM-IPSec]: Security association restriction. This patch series implements per packet access control via the extension of the Linux Security Modules (LSM) interface by hooks in the XFRM and pfkey subsystems that leverage IPSec security associations to label packets. Extensions to the SELinux LSM are included that leverage the patch for this purpose. This patch implements the changes necessary to the XFRM subsystem, pfkey interface, ipv4/ipv6, and xfrm_user interface to restrict a socket to use only authorized security associations (or no security association) to send/receive network packets. Patch purpose: The patch is designed to enable access control per packets based on the strongly authenticated IPSec security association. Such access controls augment the existing ones based on network interface and IP address. The former are very coarse-grained, and the latter can be spoofed. By using IPSec, the system can control access to remote hosts based on cryptographic keys generated using the IPSec mechanism. This enables access control on a per-machine basis or per-application if the remote machine is running the same mechanism and trusted to enforce the access control policy. Patch design approach: The overall approach is that policy (xfrm_policy) entries set by user-level programs (e.g., setkey for ipsec-tools) are extended with a security context that is used at policy selection time in the XFRM subsystem to restrict the sockets that can send/receive packets via security associations (xfrm_states) that are built from those policies. A presentation available at www.selinux-symposium.org/2005/presentations/session2/2-3-jaeger.pdf from the SELinux symposium describes the overall approach. Patch implementation details: On output, the policy retrieved (via xfrm_policy_lookup or xfrm_sk_policy_lookup) must be authorized for the security context of the socket and the same security context is required for resultant security association (retrieved or negotiated via racoon in ipsec-tools). This is enforced in xfrm_state_find. On input, the policy retrieved must also be authorized for the socket (at __xfrm_policy_check), and the security context of the policy must also match the security association being used. The patch has virtually no impact on packets that do not use IPSec. The existing Netfilter (outgoing) and LSM rcv_skb hooks are used as before. Also, if IPSec is used without security contexts, the impact is minimal. The LSM must allow such policies to be selected for the combination of socket and remote machine, but subsequent IPSec processing proceeds as in the original case. Testing: The pfkey interface is tested using the ipsec-tools. ipsec-tools have been modified (a separate ipsec-tools patch is available for version 0.5) that supports assignment of xfrm_policy entries and security associations with security contexts via setkey and the negotiation using the security contexts via racoon. The xfrm_user interface is tested via ad hoc programs that set security contexts. These programs are also available from me, and contain programs for setting, getting, and deleting policy for testing this interface. Testing of sa functions was done by tracing kernel behavior. Signed-off-by: Trent Jaeger <tjaeger@cse.psu.edu> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> Signed-off-by: David S. Miller <davem@davemloft.net>
2005-12-14 15:12:27 +08:00
* Return 0 if operation was successful (memory to allocate, legal context)
* @xfrm_policy_clone_security:
* @old_ctx contains an existing xfrm_sec_ctx.
* @new_ctxp contains a new xfrm_sec_ctx being cloned from old.
* Allocate a security structure in new_ctxp that contains the
* information from the old_ctx structure.
[LSM-IPSec]: Security association restriction. This patch series implements per packet access control via the extension of the Linux Security Modules (LSM) interface by hooks in the XFRM and pfkey subsystems that leverage IPSec security associations to label packets. Extensions to the SELinux LSM are included that leverage the patch for this purpose. This patch implements the changes necessary to the XFRM subsystem, pfkey interface, ipv4/ipv6, and xfrm_user interface to restrict a socket to use only authorized security associations (or no security association) to send/receive network packets. Patch purpose: The patch is designed to enable access control per packets based on the strongly authenticated IPSec security association. Such access controls augment the existing ones based on network interface and IP address. The former are very coarse-grained, and the latter can be spoofed. By using IPSec, the system can control access to remote hosts based on cryptographic keys generated using the IPSec mechanism. This enables access control on a per-machine basis or per-application if the remote machine is running the same mechanism and trusted to enforce the access control policy. Patch design approach: The overall approach is that policy (xfrm_policy) entries set by user-level programs (e.g., setkey for ipsec-tools) are extended with a security context that is used at policy selection time in the XFRM subsystem to restrict the sockets that can send/receive packets via security associations (xfrm_states) that are built from those policies. A presentation available at www.selinux-symposium.org/2005/presentations/session2/2-3-jaeger.pdf from the SELinux symposium describes the overall approach. Patch implementation details: On output, the policy retrieved (via xfrm_policy_lookup or xfrm_sk_policy_lookup) must be authorized for the security context of the socket and the same security context is required for resultant security association (retrieved or negotiated via racoon in ipsec-tools). This is enforced in xfrm_state_find. On input, the policy retrieved must also be authorized for the socket (at __xfrm_policy_check), and the security context of the policy must also match the security association being used. The patch has virtually no impact on packets that do not use IPSec. The existing Netfilter (outgoing) and LSM rcv_skb hooks are used as before. Also, if IPSec is used without security contexts, the impact is minimal. The LSM must allow such policies to be selected for the combination of socket and remote machine, but subsequent IPSec processing proceeds as in the original case. Testing: The pfkey interface is tested using the ipsec-tools. ipsec-tools have been modified (a separate ipsec-tools patch is available for version 0.5) that supports assignment of xfrm_policy entries and security associations with security contexts via setkey and the negotiation using the security contexts via racoon. The xfrm_user interface is tested via ad hoc programs that set security contexts. These programs are also available from me, and contain programs for setting, getting, and deleting policy for testing this interface. Testing of sa functions was done by tracing kernel behavior. Signed-off-by: Trent Jaeger <tjaeger@cse.psu.edu> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> Signed-off-by: David S. Miller <davem@davemloft.net>
2005-12-14 15:12:27 +08:00
* Return 0 if operation was successful (memory to allocate).
* @xfrm_policy_free_security:
* @ctx contains the xfrm_sec_ctx
[LSM-IPsec]: SELinux Authorize This patch contains a fix for the previous patch that adds security contexts to IPsec policies and security associations. In the previous patch, no authorization (besides the check for write permissions to SAD and SPD) is required to delete IPsec policies and security assocations with security contexts. Thus a user authorized to change SAD and SPD can bypass the IPsec policy authorization by simply deleteing policies with security contexts. To fix this security hole, an additional authorization check is added for removing security policies and security associations with security contexts. Note that if no security context is supplied on add or present on policy to be deleted, the SELinux module allows the change unconditionally. The hook is called on deletion when no context is present, which we may want to change. At present, I left it up to the module. LSM changes: The patch adds two new LSM hooks: xfrm_policy_delete and xfrm_state_delete. The new hooks are necessary to authorize deletion of IPsec policies that have security contexts. The existing hooks xfrm_policy_free and xfrm_state_free lack the context to do the authorization, so I decided to split authorization of deletion and memory management of security data, as is typical in the LSM interface. Use: The new delete hooks are checked when xfrm_policy or xfrm_state are deleted by either the xfrm_user interface (xfrm_get_policy, xfrm_del_sa) or the pfkey interface (pfkey_spddelete, pfkey_delete). SELinux changes: The new policy_delete and state_delete functions are added. Signed-off-by: Catherine Zhang <cxzhang@watson.ibm.com> Signed-off-by: Trent Jaeger <tjaeger@cse.psu.edu> Acked-by: James Morris <jmorris@namei.org> Signed-off-by: David S. Miller <davem@davemloft.net>
2006-06-09 14:39:49 +08:00
* Deallocate xp->security.
* @xfrm_policy_delete_security:
* @ctx contains the xfrm_sec_ctx.
[LSM-IPsec]: SELinux Authorize This patch contains a fix for the previous patch that adds security contexts to IPsec policies and security associations. In the previous patch, no authorization (besides the check for write permissions to SAD and SPD) is required to delete IPsec policies and security assocations with security contexts. Thus a user authorized to change SAD and SPD can bypass the IPsec policy authorization by simply deleteing policies with security contexts. To fix this security hole, an additional authorization check is added for removing security policies and security associations with security contexts. Note that if no security context is supplied on add or present on policy to be deleted, the SELinux module allows the change unconditionally. The hook is called on deletion when no context is present, which we may want to change. At present, I left it up to the module. LSM changes: The patch adds two new LSM hooks: xfrm_policy_delete and xfrm_state_delete. The new hooks are necessary to authorize deletion of IPsec policies that have security contexts. The existing hooks xfrm_policy_free and xfrm_state_free lack the context to do the authorization, so I decided to split authorization of deletion and memory management of security data, as is typical in the LSM interface. Use: The new delete hooks are checked when xfrm_policy or xfrm_state are deleted by either the xfrm_user interface (xfrm_get_policy, xfrm_del_sa) or the pfkey interface (pfkey_spddelete, pfkey_delete). SELinux changes: The new policy_delete and state_delete functions are added. Signed-off-by: Catherine Zhang <cxzhang@watson.ibm.com> Signed-off-by: Trent Jaeger <tjaeger@cse.psu.edu> Acked-by: James Morris <jmorris@namei.org> Signed-off-by: David S. Miller <davem@davemloft.net>
2006-06-09 14:39:49 +08:00
* Authorize deletion of xp->security.
[LSM-IPSec]: Security association restriction. This patch series implements per packet access control via the extension of the Linux Security Modules (LSM) interface by hooks in the XFRM and pfkey subsystems that leverage IPSec security associations to label packets. Extensions to the SELinux LSM are included that leverage the patch for this purpose. This patch implements the changes necessary to the XFRM subsystem, pfkey interface, ipv4/ipv6, and xfrm_user interface to restrict a socket to use only authorized security associations (or no security association) to send/receive network packets. Patch purpose: The patch is designed to enable access control per packets based on the strongly authenticated IPSec security association. Such access controls augment the existing ones based on network interface and IP address. The former are very coarse-grained, and the latter can be spoofed. By using IPSec, the system can control access to remote hosts based on cryptographic keys generated using the IPSec mechanism. This enables access control on a per-machine basis or per-application if the remote machine is running the same mechanism and trusted to enforce the access control policy. Patch design approach: The overall approach is that policy (xfrm_policy) entries set by user-level programs (e.g., setkey for ipsec-tools) are extended with a security context that is used at policy selection time in the XFRM subsystem to restrict the sockets that can send/receive packets via security associations (xfrm_states) that are built from those policies. A presentation available at www.selinux-symposium.org/2005/presentations/session2/2-3-jaeger.pdf from the SELinux symposium describes the overall approach. Patch implementation details: On output, the policy retrieved (via xfrm_policy_lookup or xfrm_sk_policy_lookup) must be authorized for the security context of the socket and the same security context is required for resultant security association (retrieved or negotiated via racoon in ipsec-tools). This is enforced in xfrm_state_find. On input, the policy retrieved must also be authorized for the socket (at __xfrm_policy_check), and the security context of the policy must also match the security association being used. The patch has virtually no impact on packets that do not use IPSec. The existing Netfilter (outgoing) and LSM rcv_skb hooks are used as before. Also, if IPSec is used without security contexts, the impact is minimal. The LSM must allow such policies to be selected for the combination of socket and remote machine, but subsequent IPSec processing proceeds as in the original case. Testing: The pfkey interface is tested using the ipsec-tools. ipsec-tools have been modified (a separate ipsec-tools patch is available for version 0.5) that supports assignment of xfrm_policy entries and security associations with security contexts via setkey and the negotiation using the security contexts via racoon. The xfrm_user interface is tested via ad hoc programs that set security contexts. These programs are also available from me, and contain programs for setting, getting, and deleting policy for testing this interface. Testing of sa functions was done by tracing kernel behavior. Signed-off-by: Trent Jaeger <tjaeger@cse.psu.edu> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> Signed-off-by: David S. Miller <davem@davemloft.net>
2005-12-14 15:12:27 +08:00
* @xfrm_state_alloc_security:
* @x contains the xfrm_state being added to the Security Association
* Database by the XFRM system.
* @sec_ctx contains the security context information being provided by
* the user-level SA generation program (e.g., setkey or racoon).
* @secid contains the secid from which to take the mls portion of the context.
* Allocate a security structure to the x->security field; the security
* field is initialized to NULL when the xfrm_state is allocated. Set the
* context to correspond to either sec_ctx or polsec, with the mls portion
* taken from secid in the latter case.
[LSM-IPSec]: Security association restriction. This patch series implements per packet access control via the extension of the Linux Security Modules (LSM) interface by hooks in the XFRM and pfkey subsystems that leverage IPSec security associations to label packets. Extensions to the SELinux LSM are included that leverage the patch for this purpose. This patch implements the changes necessary to the XFRM subsystem, pfkey interface, ipv4/ipv6, and xfrm_user interface to restrict a socket to use only authorized security associations (or no security association) to send/receive network packets. Patch purpose: The patch is designed to enable access control per packets based on the strongly authenticated IPSec security association. Such access controls augment the existing ones based on network interface and IP address. The former are very coarse-grained, and the latter can be spoofed. By using IPSec, the system can control access to remote hosts based on cryptographic keys generated using the IPSec mechanism. This enables access control on a per-machine basis or per-application if the remote machine is running the same mechanism and trusted to enforce the access control policy. Patch design approach: The overall approach is that policy (xfrm_policy) entries set by user-level programs (e.g., setkey for ipsec-tools) are extended with a security context that is used at policy selection time in the XFRM subsystem to restrict the sockets that can send/receive packets via security associations (xfrm_states) that are built from those policies. A presentation available at www.selinux-symposium.org/2005/presentations/session2/2-3-jaeger.pdf from the SELinux symposium describes the overall approach. Patch implementation details: On output, the policy retrieved (via xfrm_policy_lookup or xfrm_sk_policy_lookup) must be authorized for the security context of the socket and the same security context is required for resultant security association (retrieved or negotiated via racoon in ipsec-tools). This is enforced in xfrm_state_find. On input, the policy retrieved must also be authorized for the socket (at __xfrm_policy_check), and the security context of the policy must also match the security association being used. The patch has virtually no impact on packets that do not use IPSec. The existing Netfilter (outgoing) and LSM rcv_skb hooks are used as before. Also, if IPSec is used without security contexts, the impact is minimal. The LSM must allow such policies to be selected for the combination of socket and remote machine, but subsequent IPSec processing proceeds as in the original case. Testing: The pfkey interface is tested using the ipsec-tools. ipsec-tools have been modified (a separate ipsec-tools patch is available for version 0.5) that supports assignment of xfrm_policy entries and security associations with security contexts via setkey and the negotiation using the security contexts via racoon. The xfrm_user interface is tested via ad hoc programs that set security contexts. These programs are also available from me, and contain programs for setting, getting, and deleting policy for testing this interface. Testing of sa functions was done by tracing kernel behavior. Signed-off-by: Trent Jaeger <tjaeger@cse.psu.edu> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> Signed-off-by: David S. Miller <davem@davemloft.net>
2005-12-14 15:12:27 +08:00
* Return 0 if operation was successful (memory to allocate, legal context).
* @xfrm_state_free_security:
* @x contains the xfrm_state.
[LSM-IPsec]: SELinux Authorize This patch contains a fix for the previous patch that adds security contexts to IPsec policies and security associations. In the previous patch, no authorization (besides the check for write permissions to SAD and SPD) is required to delete IPsec policies and security assocations with security contexts. Thus a user authorized to change SAD and SPD can bypass the IPsec policy authorization by simply deleteing policies with security contexts. To fix this security hole, an additional authorization check is added for removing security policies and security associations with security contexts. Note that if no security context is supplied on add or present on policy to be deleted, the SELinux module allows the change unconditionally. The hook is called on deletion when no context is present, which we may want to change. At present, I left it up to the module. LSM changes: The patch adds two new LSM hooks: xfrm_policy_delete and xfrm_state_delete. The new hooks are necessary to authorize deletion of IPsec policies that have security contexts. The existing hooks xfrm_policy_free and xfrm_state_free lack the context to do the authorization, so I decided to split authorization of deletion and memory management of security data, as is typical in the LSM interface. Use: The new delete hooks are checked when xfrm_policy or xfrm_state are deleted by either the xfrm_user interface (xfrm_get_policy, xfrm_del_sa) or the pfkey interface (pfkey_spddelete, pfkey_delete). SELinux changes: The new policy_delete and state_delete functions are added. Signed-off-by: Catherine Zhang <cxzhang@watson.ibm.com> Signed-off-by: Trent Jaeger <tjaeger@cse.psu.edu> Acked-by: James Morris <jmorris@namei.org> Signed-off-by: David S. Miller <davem@davemloft.net>
2006-06-09 14:39:49 +08:00
* Deallocate x->security.
* @xfrm_state_delete_security:
* @x contains the xfrm_state.
* Authorize deletion of x->security.
[LSM-IPSec]: Security association restriction. This patch series implements per packet access control via the extension of the Linux Security Modules (LSM) interface by hooks in the XFRM and pfkey subsystems that leverage IPSec security associations to label packets. Extensions to the SELinux LSM are included that leverage the patch for this purpose. This patch implements the changes necessary to the XFRM subsystem, pfkey interface, ipv4/ipv6, and xfrm_user interface to restrict a socket to use only authorized security associations (or no security association) to send/receive network packets. Patch purpose: The patch is designed to enable access control per packets based on the strongly authenticated IPSec security association. Such access controls augment the existing ones based on network interface and IP address. The former are very coarse-grained, and the latter can be spoofed. By using IPSec, the system can control access to remote hosts based on cryptographic keys generated using the IPSec mechanism. This enables access control on a per-machine basis or per-application if the remote machine is running the same mechanism and trusted to enforce the access control policy. Patch design approach: The overall approach is that policy (xfrm_policy) entries set by user-level programs (e.g., setkey for ipsec-tools) are extended with a security context that is used at policy selection time in the XFRM subsystem to restrict the sockets that can send/receive packets via security associations (xfrm_states) that are built from those policies. A presentation available at www.selinux-symposium.org/2005/presentations/session2/2-3-jaeger.pdf from the SELinux symposium describes the overall approach. Patch implementation details: On output, the policy retrieved (via xfrm_policy_lookup or xfrm_sk_policy_lookup) must be authorized for the security context of the socket and the same security context is required for resultant security association (retrieved or negotiated via racoon in ipsec-tools). This is enforced in xfrm_state_find. On input, the policy retrieved must also be authorized for the socket (at __xfrm_policy_check), and the security context of the policy must also match the security association being used. The patch has virtually no impact on packets that do not use IPSec. The existing Netfilter (outgoing) and LSM rcv_skb hooks are used as before. Also, if IPSec is used without security contexts, the impact is minimal. The LSM must allow such policies to be selected for the combination of socket and remote machine, but subsequent IPSec processing proceeds as in the original case. Testing: The pfkey interface is tested using the ipsec-tools. ipsec-tools have been modified (a separate ipsec-tools patch is available for version 0.5) that supports assignment of xfrm_policy entries and security associations with security contexts via setkey and the negotiation using the security contexts via racoon. The xfrm_user interface is tested via ad hoc programs that set security contexts. These programs are also available from me, and contain programs for setting, getting, and deleting policy for testing this interface. Testing of sa functions was done by tracing kernel behavior. Signed-off-by: Trent Jaeger <tjaeger@cse.psu.edu> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> Signed-off-by: David S. Miller <davem@davemloft.net>
2005-12-14 15:12:27 +08:00
* @xfrm_policy_lookup:
* @ctx contains the xfrm_sec_ctx for which the access control is being
[LSM-IPSec]: Security association restriction. This patch series implements per packet access control via the extension of the Linux Security Modules (LSM) interface by hooks in the XFRM and pfkey subsystems that leverage IPSec security associations to label packets. Extensions to the SELinux LSM are included that leverage the patch for this purpose. This patch implements the changes necessary to the XFRM subsystem, pfkey interface, ipv4/ipv6, and xfrm_user interface to restrict a socket to use only authorized security associations (or no security association) to send/receive network packets. Patch purpose: The patch is designed to enable access control per packets based on the strongly authenticated IPSec security association. Such access controls augment the existing ones based on network interface and IP address. The former are very coarse-grained, and the latter can be spoofed. By using IPSec, the system can control access to remote hosts based on cryptographic keys generated using the IPSec mechanism. This enables access control on a per-machine basis or per-application if the remote machine is running the same mechanism and trusted to enforce the access control policy. Patch design approach: The overall approach is that policy (xfrm_policy) entries set by user-level programs (e.g., setkey for ipsec-tools) are extended with a security context that is used at policy selection time in the XFRM subsystem to restrict the sockets that can send/receive packets via security associations (xfrm_states) that are built from those policies. A presentation available at www.selinux-symposium.org/2005/presentations/session2/2-3-jaeger.pdf from the SELinux symposium describes the overall approach. Patch implementation details: On output, the policy retrieved (via xfrm_policy_lookup or xfrm_sk_policy_lookup) must be authorized for the security context of the socket and the same security context is required for resultant security association (retrieved or negotiated via racoon in ipsec-tools). This is enforced in xfrm_state_find. On input, the policy retrieved must also be authorized for the socket (at __xfrm_policy_check), and the security context of the policy must also match the security association being used. The patch has virtually no impact on packets that do not use IPSec. The existing Netfilter (outgoing) and LSM rcv_skb hooks are used as before. Also, if IPSec is used without security contexts, the impact is minimal. The LSM must allow such policies to be selected for the combination of socket and remote machine, but subsequent IPSec processing proceeds as in the original case. Testing: The pfkey interface is tested using the ipsec-tools. ipsec-tools have been modified (a separate ipsec-tools patch is available for version 0.5) that supports assignment of xfrm_policy entries and security associations with security contexts via setkey and the negotiation using the security contexts via racoon. The xfrm_user interface is tested via ad hoc programs that set security contexts. These programs are also available from me, and contain programs for setting, getting, and deleting policy for testing this interface. Testing of sa functions was done by tracing kernel behavior. Signed-off-by: Trent Jaeger <tjaeger@cse.psu.edu> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> Signed-off-by: David S. Miller <davem@davemloft.net>
2005-12-14 15:12:27 +08:00
* checked.
* @fl_secid contains the flow security label that is used to authorize
[LSM-IPSec]: Security association restriction. This patch series implements per packet access control via the extension of the Linux Security Modules (LSM) interface by hooks in the XFRM and pfkey subsystems that leverage IPSec security associations to label packets. Extensions to the SELinux LSM are included that leverage the patch for this purpose. This patch implements the changes necessary to the XFRM subsystem, pfkey interface, ipv4/ipv6, and xfrm_user interface to restrict a socket to use only authorized security associations (or no security association) to send/receive network packets. Patch purpose: The patch is designed to enable access control per packets based on the strongly authenticated IPSec security association. Such access controls augment the existing ones based on network interface and IP address. The former are very coarse-grained, and the latter can be spoofed. By using IPSec, the system can control access to remote hosts based on cryptographic keys generated using the IPSec mechanism. This enables access control on a per-machine basis or per-application if the remote machine is running the same mechanism and trusted to enforce the access control policy. Patch design approach: The overall approach is that policy (xfrm_policy) entries set by user-level programs (e.g., setkey for ipsec-tools) are extended with a security context that is used at policy selection time in the XFRM subsystem to restrict the sockets that can send/receive packets via security associations (xfrm_states) that are built from those policies. A presentation available at www.selinux-symposium.org/2005/presentations/session2/2-3-jaeger.pdf from the SELinux symposium describes the overall approach. Patch implementation details: On output, the policy retrieved (via xfrm_policy_lookup or xfrm_sk_policy_lookup) must be authorized for the security context of the socket and the same security context is required for resultant security association (retrieved or negotiated via racoon in ipsec-tools). This is enforced in xfrm_state_find. On input, the policy retrieved must also be authorized for the socket (at __xfrm_policy_check), and the security context of the policy must also match the security association being used. The patch has virtually no impact on packets that do not use IPSec. The existing Netfilter (outgoing) and LSM rcv_skb hooks are used as before. Also, if IPSec is used without security contexts, the impact is minimal. The LSM must allow such policies to be selected for the combination of socket and remote machine, but subsequent IPSec processing proceeds as in the original case. Testing: The pfkey interface is tested using the ipsec-tools. ipsec-tools have been modified (a separate ipsec-tools patch is available for version 0.5) that supports assignment of xfrm_policy entries and security associations with security contexts via setkey and the negotiation using the security contexts via racoon. The xfrm_user interface is tested via ad hoc programs that set security contexts. These programs are also available from me, and contain programs for setting, getting, and deleting policy for testing this interface. Testing of sa functions was done by tracing kernel behavior. Signed-off-by: Trent Jaeger <tjaeger@cse.psu.edu> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> Signed-off-by: David S. Miller <davem@davemloft.net>
2005-12-14 15:12:27 +08:00
* access to the policy xp.
* @dir contains the direction of the flow (input or output).
* Check permission when a flow selects a xfrm_policy for processing
[LSM-IPSec]: Security association restriction. This patch series implements per packet access control via the extension of the Linux Security Modules (LSM) interface by hooks in the XFRM and pfkey subsystems that leverage IPSec security associations to label packets. Extensions to the SELinux LSM are included that leverage the patch for this purpose. This patch implements the changes necessary to the XFRM subsystem, pfkey interface, ipv4/ipv6, and xfrm_user interface to restrict a socket to use only authorized security associations (or no security association) to send/receive network packets. Patch purpose: The patch is designed to enable access control per packets based on the strongly authenticated IPSec security association. Such access controls augment the existing ones based on network interface and IP address. The former are very coarse-grained, and the latter can be spoofed. By using IPSec, the system can control access to remote hosts based on cryptographic keys generated using the IPSec mechanism. This enables access control on a per-machine basis or per-application if the remote machine is running the same mechanism and trusted to enforce the access control policy. Patch design approach: The overall approach is that policy (xfrm_policy) entries set by user-level programs (e.g., setkey for ipsec-tools) are extended with a security context that is used at policy selection time in the XFRM subsystem to restrict the sockets that can send/receive packets via security associations (xfrm_states) that are built from those policies. A presentation available at www.selinux-symposium.org/2005/presentations/session2/2-3-jaeger.pdf from the SELinux symposium describes the overall approach. Patch implementation details: On output, the policy retrieved (via xfrm_policy_lookup or xfrm_sk_policy_lookup) must be authorized for the security context of the socket and the same security context is required for resultant security association (retrieved or negotiated via racoon in ipsec-tools). This is enforced in xfrm_state_find. On input, the policy retrieved must also be authorized for the socket (at __xfrm_policy_check), and the security context of the policy must also match the security association being used. The patch has virtually no impact on packets that do not use IPSec. The existing Netfilter (outgoing) and LSM rcv_skb hooks are used as before. Also, if IPSec is used without security contexts, the impact is minimal. The LSM must allow such policies to be selected for the combination of socket and remote machine, but subsequent IPSec processing proceeds as in the original case. Testing: The pfkey interface is tested using the ipsec-tools. ipsec-tools have been modified (a separate ipsec-tools patch is available for version 0.5) that supports assignment of xfrm_policy entries and security associations with security contexts via setkey and the negotiation using the security contexts via racoon. The xfrm_user interface is tested via ad hoc programs that set security contexts. These programs are also available from me, and contain programs for setting, getting, and deleting policy for testing this interface. Testing of sa functions was done by tracing kernel behavior. Signed-off-by: Trent Jaeger <tjaeger@cse.psu.edu> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> Signed-off-by: David S. Miller <davem@davemloft.net>
2005-12-14 15:12:27 +08:00
* XFRMs on a packet. The hook is called when selecting either a
* per-socket policy or a generic xfrm policy.
IPsec: correct semantics for SELinux policy matching Currently when an IPSec policy rule doesn't specify a security context, it is assumed to be "unlabeled" by SELinux, and so the IPSec policy rule fails to match to a flow that it would otherwise match to, unless one has explicitly added an SELinux policy rule allowing the flow to "polmatch" to the "unlabeled" IPSec policy rules. In the absence of such an explicitly added SELinux policy rule, the IPSec policy rule fails to match and so the packet(s) flow in clear text without the otherwise applicable xfrm(s) applied. The above SELinux behavior violates the SELinux security notion of "deny by default" which should actually translate to "encrypt by default" in the above case. This was first reported by Evgeniy Polyakov and the way James Morris was seeing the problem was when connecting via IPsec to a confined service on an SELinux box (vsftpd), which did not have the appropriate SELinux policy permissions to send packets via IPsec. With this patch applied, SELinux "polmatching" of flows Vs. IPSec policy rules will only come into play when there's a explicit context specified for the IPSec policy rule (which also means there's corresponding SELinux policy allowing appropriate domains/flows to polmatch to this context). Secondly, when a security module is loaded (in this case, SELinux), the security_xfrm_policy_lookup() hook can return errors other than access denied, such as -EINVAL. We were not handling that correctly, and in fact inverting the return logic and propagating a false "ok" back up to xfrm_lookup(), which then allowed packets to pass as if they were not associated with an xfrm policy. The solution for this is to first ensure that errno values are correctly propagated all the way back up through the various call chains from security_xfrm_policy_lookup(), and handled correctly. Then, flow_cache_lookup() is modified, so that if the policy resolver fails (typically a permission denied via the security module), the flow cache entry is killed rather than having a null policy assigned (which indicates that the packet can pass freely). This also forces any future lookups for the same flow to consult the security module (e.g. SELinux) for current security policy (rather than, say, caching the error on the flow cache entry). This patch: Fix the selinux side of things. This makes sure SELinux polmatching of flow contexts to IPSec policy rules comes into play only when an explicit context is associated with the IPSec policy rule. Also, this no longer defaults the context of a socket policy to the context of the socket since the "no explicit context" case is now handled properly. Signed-off-by: Venkat Yekkirala <vyekkirala@TrustedCS.com> Signed-off-by: James Morris <jmorris@namei.org>
2006-10-06 04:42:18 +08:00
* Return 0 if permission is granted, -ESRCH otherwise, or -errno
* on other errors.
* @xfrm_state_pol_flow_match:
* @x contains the state to match.
* @xp contains the policy to check for a match.
* @fl contains the flow to check for a match.
* Return 1 if there is a match.
* @xfrm_decode_session:
* @skb points to skb to decode.
* @secid points to the flow key secid to set.
* @ckall says if all xfrms used should be checked for same secid.
* Return 0 if ckall is zero or all xfrms used have the same secid.
*
* Security hooks affecting all Key Management operations
*
* @key_alloc:
* Permit allocation of a key and assign security data. Note that key does
* not have a serial number assigned at this point.
* @key points to the key.
* @flags is the allocation flags
* Return 0 if permission is granted, -ve error otherwise.
* @key_free:
* Notification of destruction; free security data.
* @key points to the key.
* No return value.
* @key_permission:
* See whether a specific operational right is granted to a process on a
* key.
* @key_ref refers to the key (key pointer + possession attribute bit).
CRED: Inaugurate COW credentials Inaugurate copy-on-write credentials management. This uses RCU to manage the credentials pointer in the task_struct with respect to accesses by other tasks. A process may only modify its own credentials, and so does not need locking to access or modify its own credentials. A mutex (cred_replace_mutex) is added to the task_struct to control the effect of PTRACE_ATTACHED on credential calculations, particularly with respect to execve(). With this patch, the contents of an active credentials struct may not be changed directly; rather a new set of credentials must be prepared, modified and committed using something like the following sequence of events: struct cred *new = prepare_creds(); int ret = blah(new); if (ret < 0) { abort_creds(new); return ret; } return commit_creds(new); There are some exceptions to this rule: the keyrings pointed to by the active credentials may be instantiated - keyrings violate the COW rule as managing COW keyrings is tricky, given that it is possible for a task to directly alter the keys in a keyring in use by another task. To help enforce this, various pointers to sets of credentials, such as those in the task_struct, are declared const. The purpose of this is compile-time discouragement of altering credentials through those pointers. Once a set of credentials has been made public through one of these pointers, it may not be modified, except under special circumstances: (1) Its reference count may incremented and decremented. (2) The keyrings to which it points may be modified, but not replaced. The only safe way to modify anything else is to create a replacement and commit using the functions described in Documentation/credentials.txt (which will be added by a later patch). This patch and the preceding patches have been tested with the LTP SELinux testsuite. This patch makes several logical sets of alteration: (1) execve(). This now prepares and commits credentials in various places in the security code rather than altering the current creds directly. (2) Temporary credential overrides. do_coredump() and sys_faccessat() now prepare their own credentials and temporarily override the ones currently on the acting thread, whilst preventing interference from other threads by holding cred_replace_mutex on the thread being dumped. This will be replaced in a future patch by something that hands down the credentials directly to the functions being called, rather than altering the task's objective credentials. (3) LSM interface. A number of functions have been changed, added or removed: (*) security_capset_check(), ->capset_check() (*) security_capset_set(), ->capset_set() Removed in favour of security_capset(). (*) security_capset(), ->capset() New. This is passed a pointer to the new creds, a pointer to the old creds and the proposed capability sets. It should fill in the new creds or return an error. All pointers, barring the pointer to the new creds, are now const. (*) security_bprm_apply_creds(), ->bprm_apply_creds() Changed; now returns a value, which will cause the process to be killed if it's an error. (*) security_task_alloc(), ->task_alloc_security() Removed in favour of security_prepare_creds(). (*) security_cred_free(), ->cred_free() New. Free security data attached to cred->security. (*) security_prepare_creds(), ->cred_prepare() New. Duplicate any security data attached to cred->security. (*) security_commit_creds(), ->cred_commit() New. Apply any security effects for the upcoming installation of new security by commit_creds(). (*) security_task_post_setuid(), ->task_post_setuid() Removed in favour of security_task_fix_setuid(). (*) security_task_fix_setuid(), ->task_fix_setuid() Fix up the proposed new credentials for setuid(). This is used by cap_set_fix_setuid() to implicitly adjust capabilities in line with setuid() changes. Changes are made to the new credentials, rather than the task itself as in security_task_post_setuid(). (*) security_task_reparent_to_init(), ->task_reparent_to_init() Removed. Instead the task being reparented to init is referred directly to init's credentials. NOTE! This results in the loss of some state: SELinux's osid no longer records the sid of the thread that forked it. (*) security_key_alloc(), ->key_alloc() (*) security_key_permission(), ->key_permission() Changed. These now take cred pointers rather than task pointers to refer to the security context. (4) sys_capset(). This has been simplified and uses less locking. The LSM functions it calls have been merged. (5) reparent_to_kthreadd(). This gives the current thread the same credentials as init by simply using commit_thread() to point that way. (6) __sigqueue_alloc() and switch_uid() __sigqueue_alloc() can't stop the target task from changing its creds beneath it, so this function gets a reference to the currently applicable user_struct which it then passes into the sigqueue struct it returns if successful. switch_uid() is now called from commit_creds(), and possibly should be folded into that. commit_creds() should take care of protecting __sigqueue_alloc(). (7) [sg]et[ug]id() and co and [sg]et_current_groups. The set functions now all use prepare_creds(), commit_creds() and abort_creds() to build and check a new set of credentials before applying it. security_task_set[ug]id() is called inside the prepared section. This guarantees that nothing else will affect the creds until we've finished. The calling of set_dumpable() has been moved into commit_creds(). Much of the functionality of set_user() has been moved into commit_creds(). The get functions all simply access the data directly. (8) security_task_prctl() and cap_task_prctl(). security_task_prctl() has been modified to return -ENOSYS if it doesn't want to handle a function, or otherwise return the return value directly rather than through an argument. Additionally, cap_task_prctl() now prepares a new set of credentials, even if it doesn't end up using it. (9) Keyrings. A number of changes have been made to the keyrings code: (a) switch_uid_keyring(), copy_keys(), exit_keys() and suid_keys() have all been dropped and built in to the credentials functions directly. They may want separating out again later. (b) key_alloc() and search_process_keyrings() now take a cred pointer rather than a task pointer to specify the security context. (c) copy_creds() gives a new thread within the same thread group a new thread keyring if its parent had one, otherwise it discards the thread keyring. (d) The authorisation key now points directly to the credentials to extend the search into rather pointing to the task that carries them. (e) Installing thread, process or session keyrings causes a new set of credentials to be created, even though it's not strictly necessary for process or session keyrings (they're shared). (10) Usermode helper. The usermode helper code now carries a cred struct pointer in its subprocess_info struct instead of a new session keyring pointer. This set of credentials is derived from init_cred and installed on the new process after it has been cloned. call_usermodehelper_setup() allocates the new credentials and call_usermodehelper_freeinfo() discards them if they haven't been used. A special cred function (prepare_usermodeinfo_creds()) is provided specifically for call_usermodehelper_setup() to call. call_usermodehelper_setkeys() adjusts the credentials to sport the supplied keyring as the new session keyring. (11) SELinux. SELinux has a number of changes, in addition to those to support the LSM interface changes mentioned above: (a) selinux_setprocattr() no longer does its check for whether the current ptracer can access processes with the new SID inside the lock that covers getting the ptracer's SID. Whilst this lock ensures that the check is done with the ptracer pinned, the result is only valid until the lock is released, so there's no point doing it inside the lock. (12) is_single_threaded(). This function has been extracted from selinux_setprocattr() and put into a file of its own in the lib/ directory as join_session_keyring() now wants to use it too. The code in SELinux just checked to see whether a task shared mm_structs with other tasks (CLONE_VM), but that isn't good enough. We really want to know if they're part of the same thread group (CLONE_THREAD). (13) nfsd. The NFS server daemon now has to use the COW credentials to set the credentials it is going to use. It really needs to pass the credentials down to the functions it calls, but it can't do that until other patches in this series have been applied. Signed-off-by: David Howells <dhowells@redhat.com> Acked-by: James Morris <jmorris@namei.org> Signed-off-by: James Morris <jmorris@namei.org>
2008-11-14 07:39:23 +08:00
* @cred points to the credentials to provide the context against which to
* evaluate the security data on the key.
* @perm describes the combination of permissions required of this key.
* Return 0 if permission is granted, -ve error otherwise.
* @key_getsecurity:
* Get a textual representation of the security context attached to a key
* for the purposes of honouring KEYCTL_GETSECURITY. This function
* allocates the storage for the NUL-terminated string and the caller
* should free it.
* @key points to the key to be queried.
* @_buffer points to a pointer that should be set to point to the
* resulting string (if no label or an error occurs).
* Return the length of the string (including terminating NUL) or -ve if
* an error.
* May also return 0 (and a NULL buffer pointer) if there is no label.
*
* Security hooks affecting all System V IPC operations.
*
* @ipc_permission:
* Check permissions for access to IPC
* @ipcp contains the kernel IPC permission structure
* @flag contains the desired (requested) permission set
* Return 0 if permission is granted.
* @ipc_getsecid:
* Get the secid associated with the ipc object.
* @ipcp contains the kernel IPC permission structure.
* @secid contains a pointer to the location where result will be saved.
* In case of failure, @secid will be set to zero.
*
* Security hooks for individual messages held in System V IPC message queues
* @msg_msg_alloc_security:
* Allocate and attach a security structure to the msg->security field.
* The security field is initialized to NULL when the structure is first
* created.
* @msg contains the message structure to be modified.
* Return 0 if operation was successful and permission is granted.
* @msg_msg_free_security:
* Deallocate the security structure for this message.
* @msg contains the message structure to be modified.
*
* Security hooks for System V IPC Message Queues
*
* @msg_queue_alloc_security:
* Allocate and attach a security structure to the
* msq->q_perm.security field. The security field is initialized to
* NULL when the structure is first created.
* @msq contains the message queue structure to be modified.
* Return 0 if operation was successful and permission is granted.
* @msg_queue_free_security:
* Deallocate security structure for this message queue.
* @msq contains the message queue structure to be modified.
* @msg_queue_associate:
* Check permission when a message queue is requested through the
* msgget system call. This hook is only called when returning the
* message queue identifier for an existing message queue, not when a
* new message queue is created.
* @msq contains the message queue to act upon.
* @msqflg contains the operation control flags.
* Return 0 if permission is granted.
* @msg_queue_msgctl:
* Check permission when a message control operation specified by @cmd
* is to be performed on the message queue @msq.
* The @msq may be NULL, e.g. for IPC_INFO or MSG_INFO.
* @msq contains the message queue to act upon. May be NULL.
* @cmd contains the operation to be performed.
* Return 0 if permission is granted.
* @msg_queue_msgsnd:
* Check permission before a message, @msg, is enqueued on the message
* queue, @msq.
* @msq contains the message queue to send message to.
* @msg contains the message to be enqueued.
* @msqflg contains operational flags.
* Return 0 if permission is granted.
* @msg_queue_msgrcv:
* Check permission before a message, @msg, is removed from the message
* queue, @msq. The @target task structure contains a pointer to the
* process that will be receiving the message (not equal to the current
* process when inline receives are being performed).
* @msq contains the message queue to retrieve message from.
* @msg contains the message destination.
* @target contains the task structure for recipient process.
* @type contains the type of message requested.
* @mode contains the operational flags.
* Return 0 if permission is granted.
*
* Security hooks for System V Shared Memory Segments
*
* @shm_alloc_security:
* Allocate and attach a security structure to the shp->shm_perm.security
* field. The security field is initialized to NULL when the structure is
* first created.
* @shp contains the shared memory structure to be modified.
* Return 0 if operation was successful and permission is granted.
* @shm_free_security:
* Deallocate the security struct for this memory segment.
* @shp contains the shared memory structure to be modified.
* @shm_associate:
* Check permission when a shared memory region is requested through the
* shmget system call. This hook is only called when returning the shared
* memory region identifier for an existing region, not when a new shared
* memory region is created.
* @shp contains the shared memory structure to be modified.
* @shmflg contains the operation control flags.
* Return 0 if permission is granted.
* @shm_shmctl:
* Check permission when a shared memory control operation specified by
* @cmd is to be performed on the shared memory region @shp.
* The @shp may be NULL, e.g. for IPC_INFO or SHM_INFO.
* @shp contains shared memory structure to be modified.
* @cmd contains the operation to be performed.
* Return 0 if permission is granted.
* @shm_shmat:
* Check permissions prior to allowing the shmat system call to attach the
* shared memory segment @shp to the data segment of the calling process.
* The attaching address is specified by @shmaddr.
* @shp contains the shared memory structure to be modified.
* @shmaddr contains the address to attach memory region to.
* @shmflg contains the operational flags.
* Return 0 if permission is granted.
*
* Security hooks for System V Semaphores
*
* @sem_alloc_security:
* Allocate and attach a security structure to the sma->sem_perm.security
* field. The security field is initialized to NULL when the structure is
* first created.
* @sma contains the semaphore structure
* Return 0 if operation was successful and permission is granted.
* @sem_free_security:
* deallocate security struct for this semaphore
* @sma contains the semaphore structure.
* @sem_associate:
* Check permission when a semaphore is requested through the semget
* system call. This hook is only called when returning the semaphore
* identifier for an existing semaphore, not when a new one must be
* created.
* @sma contains the semaphore structure.
* @semflg contains the operation control flags.
* Return 0 if permission is granted.
* @sem_semctl:
* Check permission when a semaphore operation specified by @cmd is to be
* performed on the semaphore @sma. The @sma may be NULL, e.g. for
* IPC_INFO or SEM_INFO.
* @sma contains the semaphore structure. May be NULL.
* @cmd contains the operation to be performed.
* Return 0 if permission is granted.
* @sem_semop
* Check permissions before performing operations on members of the
* semaphore set @sma. If the @alter flag is nonzero, the semaphore set
* may be modified.
* @sma contains the semaphore structure.
* @sops contains the operations to perform.
* @nsops contains the number of operations to perform.
* @alter contains the flag indicating whether changes are to be made.
* Return 0 if permission is granted.
*
* @ptrace_access_check:
security: Fix setting of PF_SUPERPRIV by __capable() Fix the setting of PF_SUPERPRIV by __capable() as it could corrupt the flags the target process if that is not the current process and it is trying to change its own flags in a different way at the same time. __capable() is using neither atomic ops nor locking to protect t->flags. This patch removes __capable() and introduces has_capability() that doesn't set PF_SUPERPRIV on the process being queried. This patch further splits security_ptrace() in two: (1) security_ptrace_may_access(). This passes judgement on whether one process may access another only (PTRACE_MODE_ATTACH for ptrace() and PTRACE_MODE_READ for /proc), and takes a pointer to the child process. current is the parent. (2) security_ptrace_traceme(). This passes judgement on PTRACE_TRACEME only, and takes only a pointer to the parent process. current is the child. In Smack and commoncap, this uses has_capability() to determine whether the parent will be permitted to use PTRACE_ATTACH if normal checks fail. This does not set PF_SUPERPRIV. Two of the instances of __capable() actually only act on current, and so have been changed to calls to capable(). Of the places that were using __capable(): (1) The OOM killer calls __capable() thrice when weighing the killability of a process. All of these now use has_capability(). (2) cap_ptrace() and smack_ptrace() were using __capable() to check to see whether the parent was allowed to trace any process. As mentioned above, these have been split. For PTRACE_ATTACH and /proc, capable() is now used, and for PTRACE_TRACEME, has_capability() is used. (3) cap_safe_nice() only ever saw current, so now uses capable(). (4) smack_setprocattr() rejected accesses to tasks other than current just after calling __capable(), so the order of these two tests have been switched and capable() is used instead. (5) In smack_file_send_sigiotask(), we need to allow privileged processes to receive SIGIO on files they're manipulating. (6) In smack_task_wait(), we let a process wait for a privileged process, whether or not the process doing the waiting is privileged. I've tested this with the LTP SELinux and syscalls testscripts. Signed-off-by: David Howells <dhowells@redhat.com> Acked-by: Serge Hallyn <serue@us.ibm.com> Acked-by: Casey Schaufler <casey@schaufler-ca.com> Acked-by: Andrew G. Morgan <morgan@kernel.org> Acked-by: Al Viro <viro@zeniv.linux.org.uk> Signed-off-by: James Morris <jmorris@namei.org>
2008-08-14 18:37:28 +08:00
* Check permission before allowing the current process to trace the
* @child process.
* Security modules may also want to perform a process tracing check
* during an execve in the set_security or apply_creds hooks of
CRED: Inaugurate COW credentials Inaugurate copy-on-write credentials management. This uses RCU to manage the credentials pointer in the task_struct with respect to accesses by other tasks. A process may only modify its own credentials, and so does not need locking to access or modify its own credentials. A mutex (cred_replace_mutex) is added to the task_struct to control the effect of PTRACE_ATTACHED on credential calculations, particularly with respect to execve(). With this patch, the contents of an active credentials struct may not be changed directly; rather a new set of credentials must be prepared, modified and committed using something like the following sequence of events: struct cred *new = prepare_creds(); int ret = blah(new); if (ret < 0) { abort_creds(new); return ret; } return commit_creds(new); There are some exceptions to this rule: the keyrings pointed to by the active credentials may be instantiated - keyrings violate the COW rule as managing COW keyrings is tricky, given that it is possible for a task to directly alter the keys in a keyring in use by another task. To help enforce this, various pointers to sets of credentials, such as those in the task_struct, are declared const. The purpose of this is compile-time discouragement of altering credentials through those pointers. Once a set of credentials has been made public through one of these pointers, it may not be modified, except under special circumstances: (1) Its reference count may incremented and decremented. (2) The keyrings to which it points may be modified, but not replaced. The only safe way to modify anything else is to create a replacement and commit using the functions described in Documentation/credentials.txt (which will be added by a later patch). This patch and the preceding patches have been tested with the LTP SELinux testsuite. This patch makes several logical sets of alteration: (1) execve(). This now prepares and commits credentials in various places in the security code rather than altering the current creds directly. (2) Temporary credential overrides. do_coredump() and sys_faccessat() now prepare their own credentials and temporarily override the ones currently on the acting thread, whilst preventing interference from other threads by holding cred_replace_mutex on the thread being dumped. This will be replaced in a future patch by something that hands down the credentials directly to the functions being called, rather than altering the task's objective credentials. (3) LSM interface. A number of functions have been changed, added or removed: (*) security_capset_check(), ->capset_check() (*) security_capset_set(), ->capset_set() Removed in favour of security_capset(). (*) security_capset(), ->capset() New. This is passed a pointer to the new creds, a pointer to the old creds and the proposed capability sets. It should fill in the new creds or return an error. All pointers, barring the pointer to the new creds, are now const. (*) security_bprm_apply_creds(), ->bprm_apply_creds() Changed; now returns a value, which will cause the process to be killed if it's an error. (*) security_task_alloc(), ->task_alloc_security() Removed in favour of security_prepare_creds(). (*) security_cred_free(), ->cred_free() New. Free security data attached to cred->security. (*) security_prepare_creds(), ->cred_prepare() New. Duplicate any security data attached to cred->security. (*) security_commit_creds(), ->cred_commit() New. Apply any security effects for the upcoming installation of new security by commit_creds(). (*) security_task_post_setuid(), ->task_post_setuid() Removed in favour of security_task_fix_setuid(). (*) security_task_fix_setuid(), ->task_fix_setuid() Fix up the proposed new credentials for setuid(). This is used by cap_set_fix_setuid() to implicitly adjust capabilities in line with setuid() changes. Changes are made to the new credentials, rather than the task itself as in security_task_post_setuid(). (*) security_task_reparent_to_init(), ->task_reparent_to_init() Removed. Instead the task being reparented to init is referred directly to init's credentials. NOTE! This results in the loss of some state: SELinux's osid no longer records the sid of the thread that forked it. (*) security_key_alloc(), ->key_alloc() (*) security_key_permission(), ->key_permission() Changed. These now take cred pointers rather than task pointers to refer to the security context. (4) sys_capset(). This has been simplified and uses less locking. The LSM functions it calls have been merged. (5) reparent_to_kthreadd(). This gives the current thread the same credentials as init by simply using commit_thread() to point that way. (6) __sigqueue_alloc() and switch_uid() __sigqueue_alloc() can't stop the target task from changing its creds beneath it, so this function gets a reference to the currently applicable user_struct which it then passes into the sigqueue struct it returns if successful. switch_uid() is now called from commit_creds(), and possibly should be folded into that. commit_creds() should take care of protecting __sigqueue_alloc(). (7) [sg]et[ug]id() and co and [sg]et_current_groups. The set functions now all use prepare_creds(), commit_creds() and abort_creds() to build and check a new set of credentials before applying it. security_task_set[ug]id() is called inside the prepared section. This guarantees that nothing else will affect the creds until we've finished. The calling of set_dumpable() has been moved into commit_creds(). Much of the functionality of set_user() has been moved into commit_creds(). The get functions all simply access the data directly. (8) security_task_prctl() and cap_task_prctl(). security_task_prctl() has been modified to return -ENOSYS if it doesn't want to handle a function, or otherwise return the return value directly rather than through an argument. Additionally, cap_task_prctl() now prepares a new set of credentials, even if it doesn't end up using it. (9) Keyrings. A number of changes have been made to the keyrings code: (a) switch_uid_keyring(), copy_keys(), exit_keys() and suid_keys() have all been dropped and built in to the credentials functions directly. They may want separating out again later. (b) key_alloc() and search_process_keyrings() now take a cred pointer rather than a task pointer to specify the security context. (c) copy_creds() gives a new thread within the same thread group a new thread keyring if its parent had one, otherwise it discards the thread keyring. (d) The authorisation key now points directly to the credentials to extend the search into rather pointing to the task that carries them. (e) Installing thread, process or session keyrings causes a new set of credentials to be created, even though it's not strictly necessary for process or session keyrings (they're shared). (10) Usermode helper. The usermode helper code now carries a cred struct pointer in its subprocess_info struct instead of a new session keyring pointer. This set of credentials is derived from init_cred and installed on the new process after it has been cloned. call_usermodehelper_setup() allocates the new credentials and call_usermodehelper_freeinfo() discards them if they haven't been used. A special cred function (prepare_usermodeinfo_creds()) is provided specifically for call_usermodehelper_setup() to call. call_usermodehelper_setkeys() adjusts the credentials to sport the supplied keyring as the new session keyring. (11) SELinux. SELinux has a number of changes, in addition to those to support the LSM interface changes mentioned above: (a) selinux_setprocattr() no longer does its check for whether the current ptracer can access processes with the new SID inside the lock that covers getting the ptracer's SID. Whilst this lock ensures that the check is done with the ptracer pinned, the result is only valid until the lock is released, so there's no point doing it inside the lock. (12) is_single_threaded(). This function has been extracted from selinux_setprocattr() and put into a file of its own in the lib/ directory as join_session_keyring() now wants to use it too. The code in SELinux just checked to see whether a task shared mm_structs with other tasks (CLONE_VM), but that isn't good enough. We really want to know if they're part of the same thread group (CLONE_THREAD). (13) nfsd. The NFS server daemon now has to use the COW credentials to set the credentials it is going to use. It really needs to pass the credentials down to the functions it calls, but it can't do that until other patches in this series have been applied. Signed-off-by: David Howells <dhowells@redhat.com> Acked-by: James Morris <jmorris@namei.org> Signed-off-by: James Morris <jmorris@namei.org>
2008-11-14 07:39:23 +08:00
* tracing check during an execve in the bprm_set_creds hook of
* binprm_security_ops if the process is being traced and its security
* attributes would be changed by the execve.
security: Fix setting of PF_SUPERPRIV by __capable() Fix the setting of PF_SUPERPRIV by __capable() as it could corrupt the flags the target process if that is not the current process and it is trying to change its own flags in a different way at the same time. __capable() is using neither atomic ops nor locking to protect t->flags. This patch removes __capable() and introduces has_capability() that doesn't set PF_SUPERPRIV on the process being queried. This patch further splits security_ptrace() in two: (1) security_ptrace_may_access(). This passes judgement on whether one process may access another only (PTRACE_MODE_ATTACH for ptrace() and PTRACE_MODE_READ for /proc), and takes a pointer to the child process. current is the parent. (2) security_ptrace_traceme(). This passes judgement on PTRACE_TRACEME only, and takes only a pointer to the parent process. current is the child. In Smack and commoncap, this uses has_capability() to determine whether the parent will be permitted to use PTRACE_ATTACH if normal checks fail. This does not set PF_SUPERPRIV. Two of the instances of __capable() actually only act on current, and so have been changed to calls to capable(). Of the places that were using __capable(): (1) The OOM killer calls __capable() thrice when weighing the killability of a process. All of these now use has_capability(). (2) cap_ptrace() and smack_ptrace() were using __capable() to check to see whether the parent was allowed to trace any process. As mentioned above, these have been split. For PTRACE_ATTACH and /proc, capable() is now used, and for PTRACE_TRACEME, has_capability() is used. (3) cap_safe_nice() only ever saw current, so now uses capable(). (4) smack_setprocattr() rejected accesses to tasks other than current just after calling __capable(), so the order of these two tests have been switched and capable() is used instead. (5) In smack_file_send_sigiotask(), we need to allow privileged processes to receive SIGIO on files they're manipulating. (6) In smack_task_wait(), we let a process wait for a privileged process, whether or not the process doing the waiting is privileged. I've tested this with the LTP SELinux and syscalls testscripts. Signed-off-by: David Howells <dhowells@redhat.com> Acked-by: Serge Hallyn <serue@us.ibm.com> Acked-by: Casey Schaufler <casey@schaufler-ca.com> Acked-by: Andrew G. Morgan <morgan@kernel.org> Acked-by: Al Viro <viro@zeniv.linux.org.uk> Signed-off-by: James Morris <jmorris@namei.org>
2008-08-14 18:37:28 +08:00
* @child contains the task_struct structure for the target process.
Security: split proc ptrace checking into read vs. attach Enable security modules to distinguish reading of process state via proc from full ptrace access by renaming ptrace_may_attach to ptrace_may_access and adding a mode argument indicating whether only read access or full attach access is requested. This allows security modules to permit access to reading process state without granting full ptrace access. The base DAC/capability checking remains unchanged. Read access to /proc/pid/mem continues to apply a full ptrace attach check since check_mem_permission() already requires the current task to already be ptracing the target. The other ptrace checks within proc for elements like environ, maps, and fds are changed to pass the read mode instead of attach. In the SELinux case, we model such reading of process state as a reading of a proc file labeled with the target process' label. This enables SELinux policy to permit such reading of process state without permitting control or manipulation of the target process, as there are a number of cases where programs probe for such information via proc but do not need to be able to control the target (e.g. procps, lsof, PolicyKit, ConsoleKit). At present we have to choose between allowing full ptrace in policy (more permissive than required/desired) or breaking functionality (or in some cases just silencing the denials via dontaudit rules but this can hide genuine attacks). This version of the patch incorporates comments from Casey Schaufler (change/replace existing ptrace_may_attach interface, pass access mode), and Chris Wright (provide greater consistency in the checking). Note that like their predecessors __ptrace_may_attach and ptrace_may_attach, the __ptrace_may_access and ptrace_may_access interfaces use different return value conventions from each other (0 or -errno vs. 1 or 0). I retained this difference to avoid any changes to the caller logic but made the difference clearer by changing the latter interface to return a bool rather than an int and by adding a comment about it to ptrace.h for any future callers. Signed-off-by: Stephen Smalley <sds@tycho.nsa.gov> Acked-by: Chris Wright <chrisw@sous-sol.org> Signed-off-by: James Morris <jmorris@namei.org>
2008-05-19 20:32:49 +08:00
* @mode contains the PTRACE_MODE flags indicating the form of access.
* Return 0 if permission is granted.
security: Fix setting of PF_SUPERPRIV by __capable() Fix the setting of PF_SUPERPRIV by __capable() as it could corrupt the flags the target process if that is not the current process and it is trying to change its own flags in a different way at the same time. __capable() is using neither atomic ops nor locking to protect t->flags. This patch removes __capable() and introduces has_capability() that doesn't set PF_SUPERPRIV on the process being queried. This patch further splits security_ptrace() in two: (1) security_ptrace_may_access(). This passes judgement on whether one process may access another only (PTRACE_MODE_ATTACH for ptrace() and PTRACE_MODE_READ for /proc), and takes a pointer to the child process. current is the parent. (2) security_ptrace_traceme(). This passes judgement on PTRACE_TRACEME only, and takes only a pointer to the parent process. current is the child. In Smack and commoncap, this uses has_capability() to determine whether the parent will be permitted to use PTRACE_ATTACH if normal checks fail. This does not set PF_SUPERPRIV. Two of the instances of __capable() actually only act on current, and so have been changed to calls to capable(). Of the places that were using __capable(): (1) The OOM killer calls __capable() thrice when weighing the killability of a process. All of these now use has_capability(). (2) cap_ptrace() and smack_ptrace() were using __capable() to check to see whether the parent was allowed to trace any process. As mentioned above, these have been split. For PTRACE_ATTACH and /proc, capable() is now used, and for PTRACE_TRACEME, has_capability() is used. (3) cap_safe_nice() only ever saw current, so now uses capable(). (4) smack_setprocattr() rejected accesses to tasks other than current just after calling __capable(), so the order of these two tests have been switched and capable() is used instead. (5) In smack_file_send_sigiotask(), we need to allow privileged processes to receive SIGIO on files they're manipulating. (6) In smack_task_wait(), we let a process wait for a privileged process, whether or not the process doing the waiting is privileged. I've tested this with the LTP SELinux and syscalls testscripts. Signed-off-by: David Howells <dhowells@redhat.com> Acked-by: Serge Hallyn <serue@us.ibm.com> Acked-by: Casey Schaufler <casey@schaufler-ca.com> Acked-by: Andrew G. Morgan <morgan@kernel.org> Acked-by: Al Viro <viro@zeniv.linux.org.uk> Signed-off-by: James Morris <jmorris@namei.org>
2008-08-14 18:37:28 +08:00
* @ptrace_traceme:
* Check that the @parent process has sufficient permission to trace the
* current process before allowing the current process to present itself
* to the @parent process for tracing.
* The parent process will still have to undergo the ptrace_access_check
security: Fix setting of PF_SUPERPRIV by __capable() Fix the setting of PF_SUPERPRIV by __capable() as it could corrupt the flags the target process if that is not the current process and it is trying to change its own flags in a different way at the same time. __capable() is using neither atomic ops nor locking to protect t->flags. This patch removes __capable() and introduces has_capability() that doesn't set PF_SUPERPRIV on the process being queried. This patch further splits security_ptrace() in two: (1) security_ptrace_may_access(). This passes judgement on whether one process may access another only (PTRACE_MODE_ATTACH for ptrace() and PTRACE_MODE_READ for /proc), and takes a pointer to the child process. current is the parent. (2) security_ptrace_traceme(). This passes judgement on PTRACE_TRACEME only, and takes only a pointer to the parent process. current is the child. In Smack and commoncap, this uses has_capability() to determine whether the parent will be permitted to use PTRACE_ATTACH if normal checks fail. This does not set PF_SUPERPRIV. Two of the instances of __capable() actually only act on current, and so have been changed to calls to capable(). Of the places that were using __capable(): (1) The OOM killer calls __capable() thrice when weighing the killability of a process. All of these now use has_capability(). (2) cap_ptrace() and smack_ptrace() were using __capable() to check to see whether the parent was allowed to trace any process. As mentioned above, these have been split. For PTRACE_ATTACH and /proc, capable() is now used, and for PTRACE_TRACEME, has_capability() is used. (3) cap_safe_nice() only ever saw current, so now uses capable(). (4) smack_setprocattr() rejected accesses to tasks other than current just after calling __capable(), so the order of these two tests have been switched and capable() is used instead. (5) In smack_file_send_sigiotask(), we need to allow privileged processes to receive SIGIO on files they're manipulating. (6) In smack_task_wait(), we let a process wait for a privileged process, whether or not the process doing the waiting is privileged. I've tested this with the LTP SELinux and syscalls testscripts. Signed-off-by: David Howells <dhowells@redhat.com> Acked-by: Serge Hallyn <serue@us.ibm.com> Acked-by: Casey Schaufler <casey@schaufler-ca.com> Acked-by: Andrew G. Morgan <morgan@kernel.org> Acked-by: Al Viro <viro@zeniv.linux.org.uk> Signed-off-by: James Morris <jmorris@namei.org>
2008-08-14 18:37:28 +08:00
* checks before it is allowed to trace this one.
* @parent contains the task_struct structure for debugger process.
* Return 0 if permission is granted.
* @capget:
* Get the @effective, @inheritable, and @permitted capability sets for
* the @target process. The hook may also perform permission checking to
* determine if the current process is allowed to see the capability sets
* of the @target process.
* @target contains the task_struct structure for target process.
* @effective contains the effective capability set.
* @inheritable contains the inheritable capability set.
* @permitted contains the permitted capability set.
* Return 0 if the capability sets were successfully obtained.
CRED: Inaugurate COW credentials Inaugurate copy-on-write credentials management. This uses RCU to manage the credentials pointer in the task_struct with respect to accesses by other tasks. A process may only modify its own credentials, and so does not need locking to access or modify its own credentials. A mutex (cred_replace_mutex) is added to the task_struct to control the effect of PTRACE_ATTACHED on credential calculations, particularly with respect to execve(). With this patch, the contents of an active credentials struct may not be changed directly; rather a new set of credentials must be prepared, modified and committed using something like the following sequence of events: struct cred *new = prepare_creds(); int ret = blah(new); if (ret < 0) { abort_creds(new); return ret; } return commit_creds(new); There are some exceptions to this rule: the keyrings pointed to by the active credentials may be instantiated - keyrings violate the COW rule as managing COW keyrings is tricky, given that it is possible for a task to directly alter the keys in a keyring in use by another task. To help enforce this, various pointers to sets of credentials, such as those in the task_struct, are declared const. The purpose of this is compile-time discouragement of altering credentials through those pointers. Once a set of credentials has been made public through one of these pointers, it may not be modified, except under special circumstances: (1) Its reference count may incremented and decremented. (2) The keyrings to which it points may be modified, but not replaced. The only safe way to modify anything else is to create a replacement and commit using the functions described in Documentation/credentials.txt (which will be added by a later patch). This patch and the preceding patches have been tested with the LTP SELinux testsuite. This patch makes several logical sets of alteration: (1) execve(). This now prepares and commits credentials in various places in the security code rather than altering the current creds directly. (2) Temporary credential overrides. do_coredump() and sys_faccessat() now prepare their own credentials and temporarily override the ones currently on the acting thread, whilst preventing interference from other threads by holding cred_replace_mutex on the thread being dumped. This will be replaced in a future patch by something that hands down the credentials directly to the functions being called, rather than altering the task's objective credentials. (3) LSM interface. A number of functions have been changed, added or removed: (*) security_capset_check(), ->capset_check() (*) security_capset_set(), ->capset_set() Removed in favour of security_capset(). (*) security_capset(), ->capset() New. This is passed a pointer to the new creds, a pointer to the old creds and the proposed capability sets. It should fill in the new creds or return an error. All pointers, barring the pointer to the new creds, are now const. (*) security_bprm_apply_creds(), ->bprm_apply_creds() Changed; now returns a value, which will cause the process to be killed if it's an error. (*) security_task_alloc(), ->task_alloc_security() Removed in favour of security_prepare_creds(). (*) security_cred_free(), ->cred_free() New. Free security data attached to cred->security. (*) security_prepare_creds(), ->cred_prepare() New. Duplicate any security data attached to cred->security. (*) security_commit_creds(), ->cred_commit() New. Apply any security effects for the upcoming installation of new security by commit_creds(). (*) security_task_post_setuid(), ->task_post_setuid() Removed in favour of security_task_fix_setuid(). (*) security_task_fix_setuid(), ->task_fix_setuid() Fix up the proposed new credentials for setuid(). This is used by cap_set_fix_setuid() to implicitly adjust capabilities in line with setuid() changes. Changes are made to the new credentials, rather than the task itself as in security_task_post_setuid(). (*) security_task_reparent_to_init(), ->task_reparent_to_init() Removed. Instead the task being reparented to init is referred directly to init's credentials. NOTE! This results in the loss of some state: SELinux's osid no longer records the sid of the thread that forked it. (*) security_key_alloc(), ->key_alloc() (*) security_key_permission(), ->key_permission() Changed. These now take cred pointers rather than task pointers to refer to the security context. (4) sys_capset(). This has been simplified and uses less locking. The LSM functions it calls have been merged. (5) reparent_to_kthreadd(). This gives the current thread the same credentials as init by simply using commit_thread() to point that way. (6) __sigqueue_alloc() and switch_uid() __sigqueue_alloc() can't stop the target task from changing its creds beneath it, so this function gets a reference to the currently applicable user_struct which it then passes into the sigqueue struct it returns if successful. switch_uid() is now called from commit_creds(), and possibly should be folded into that. commit_creds() should take care of protecting __sigqueue_alloc(). (7) [sg]et[ug]id() and co and [sg]et_current_groups. The set functions now all use prepare_creds(), commit_creds() and abort_creds() to build and check a new set of credentials before applying it. security_task_set[ug]id() is called inside the prepared section. This guarantees that nothing else will affect the creds until we've finished. The calling of set_dumpable() has been moved into commit_creds(). Much of the functionality of set_user() has been moved into commit_creds(). The get functions all simply access the data directly. (8) security_task_prctl() and cap_task_prctl(). security_task_prctl() has been modified to return -ENOSYS if it doesn't want to handle a function, or otherwise return the return value directly rather than through an argument. Additionally, cap_task_prctl() now prepares a new set of credentials, even if it doesn't end up using it. (9) Keyrings. A number of changes have been made to the keyrings code: (a) switch_uid_keyring(), copy_keys(), exit_keys() and suid_keys() have all been dropped and built in to the credentials functions directly. They may want separating out again later. (b) key_alloc() and search_process_keyrings() now take a cred pointer rather than a task pointer to specify the security context. (c) copy_creds() gives a new thread within the same thread group a new thread keyring if its parent had one, otherwise it discards the thread keyring. (d) The authorisation key now points directly to the credentials to extend the search into rather pointing to the task that carries them. (e) Installing thread, process or session keyrings causes a new set of credentials to be created, even though it's not strictly necessary for process or session keyrings (they're shared). (10) Usermode helper. The usermode helper code now carries a cred struct pointer in its subprocess_info struct instead of a new session keyring pointer. This set of credentials is derived from init_cred and installed on the new process after it has been cloned. call_usermodehelper_setup() allocates the new credentials and call_usermodehelper_freeinfo() discards them if they haven't been used. A special cred function (prepare_usermodeinfo_creds()) is provided specifically for call_usermodehelper_setup() to call. call_usermodehelper_setkeys() adjusts the credentials to sport the supplied keyring as the new session keyring. (11) SELinux. SELinux has a number of changes, in addition to those to support the LSM interface changes mentioned above: (a) selinux_setprocattr() no longer does its check for whether the current ptracer can access processes with the new SID inside the lock that covers getting the ptracer's SID. Whilst this lock ensures that the check is done with the ptracer pinned, the result is only valid until the lock is released, so there's no point doing it inside the lock. (12) is_single_threaded(). This function has been extracted from selinux_setprocattr() and put into a file of its own in the lib/ directory as join_session_keyring() now wants to use it too. The code in SELinux just checked to see whether a task shared mm_structs with other tasks (CLONE_VM), but that isn't good enough. We really want to know if they're part of the same thread group (CLONE_THREAD). (13) nfsd. The NFS server daemon now has to use the COW credentials to set the credentials it is going to use. It really needs to pass the credentials down to the functions it calls, but it can't do that until other patches in this series have been applied. Signed-off-by: David Howells <dhowells@redhat.com> Acked-by: James Morris <jmorris@namei.org> Signed-off-by: James Morris <jmorris@namei.org>
2008-11-14 07:39:23 +08:00
* @capset:
* Set the @effective, @inheritable, and @permitted capability sets for
* the current process.
CRED: Inaugurate COW credentials Inaugurate copy-on-write credentials management. This uses RCU to manage the credentials pointer in the task_struct with respect to accesses by other tasks. A process may only modify its own credentials, and so does not need locking to access or modify its own credentials. A mutex (cred_replace_mutex) is added to the task_struct to control the effect of PTRACE_ATTACHED on credential calculations, particularly with respect to execve(). With this patch, the contents of an active credentials struct may not be changed directly; rather a new set of credentials must be prepared, modified and committed using something like the following sequence of events: struct cred *new = prepare_creds(); int ret = blah(new); if (ret < 0) { abort_creds(new); return ret; } return commit_creds(new); There are some exceptions to this rule: the keyrings pointed to by the active credentials may be instantiated - keyrings violate the COW rule as managing COW keyrings is tricky, given that it is possible for a task to directly alter the keys in a keyring in use by another task. To help enforce this, various pointers to sets of credentials, such as those in the task_struct, are declared const. The purpose of this is compile-time discouragement of altering credentials through those pointers. Once a set of credentials has been made public through one of these pointers, it may not be modified, except under special circumstances: (1) Its reference count may incremented and decremented. (2) The keyrings to which it points may be modified, but not replaced. The only safe way to modify anything else is to create a replacement and commit using the functions described in Documentation/credentials.txt (which will be added by a later patch). This patch and the preceding patches have been tested with the LTP SELinux testsuite. This patch makes several logical sets of alteration: (1) execve(). This now prepares and commits credentials in various places in the security code rather than altering the current creds directly. (2) Temporary credential overrides. do_coredump() and sys_faccessat() now prepare their own credentials and temporarily override the ones currently on the acting thread, whilst preventing interference from other threads by holding cred_replace_mutex on the thread being dumped. This will be replaced in a future patch by something that hands down the credentials directly to the functions being called, rather than altering the task's objective credentials. (3) LSM interface. A number of functions have been changed, added or removed: (*) security_capset_check(), ->capset_check() (*) security_capset_set(), ->capset_set() Removed in favour of security_capset(). (*) security_capset(), ->capset() New. This is passed a pointer to the new creds, a pointer to the old creds and the proposed capability sets. It should fill in the new creds or return an error. All pointers, barring the pointer to the new creds, are now const. (*) security_bprm_apply_creds(), ->bprm_apply_creds() Changed; now returns a value, which will cause the process to be killed if it's an error. (*) security_task_alloc(), ->task_alloc_security() Removed in favour of security_prepare_creds(). (*) security_cred_free(), ->cred_free() New. Free security data attached to cred->security. (*) security_prepare_creds(), ->cred_prepare() New. Duplicate any security data attached to cred->security. (*) security_commit_creds(), ->cred_commit() New. Apply any security effects for the upcoming installation of new security by commit_creds(). (*) security_task_post_setuid(), ->task_post_setuid() Removed in favour of security_task_fix_setuid(). (*) security_task_fix_setuid(), ->task_fix_setuid() Fix up the proposed new credentials for setuid(). This is used by cap_set_fix_setuid() to implicitly adjust capabilities in line with setuid() changes. Changes are made to the new credentials, rather than the task itself as in security_task_post_setuid(). (*) security_task_reparent_to_init(), ->task_reparent_to_init() Removed. Instead the task being reparented to init is referred directly to init's credentials. NOTE! This results in the loss of some state: SELinux's osid no longer records the sid of the thread that forked it. (*) security_key_alloc(), ->key_alloc() (*) security_key_permission(), ->key_permission() Changed. These now take cred pointers rather than task pointers to refer to the security context. (4) sys_capset(). This has been simplified and uses less locking. The LSM functions it calls have been merged. (5) reparent_to_kthreadd(). This gives the current thread the same credentials as init by simply using commit_thread() to point that way. (6) __sigqueue_alloc() and switch_uid() __sigqueue_alloc() can't stop the target task from changing its creds beneath it, so this function gets a reference to the currently applicable user_struct which it then passes into the sigqueue struct it returns if successful. switch_uid() is now called from commit_creds(), and possibly should be folded into that. commit_creds() should take care of protecting __sigqueue_alloc(). (7) [sg]et[ug]id() and co and [sg]et_current_groups. The set functions now all use prepare_creds(), commit_creds() and abort_creds() to build and check a new set of credentials before applying it. security_task_set[ug]id() is called inside the prepared section. This guarantees that nothing else will affect the creds until we've finished. The calling of set_dumpable() has been moved into commit_creds(). Much of the functionality of set_user() has been moved into commit_creds(). The get functions all simply access the data directly. (8) security_task_prctl() and cap_task_prctl(). security_task_prctl() has been modified to return -ENOSYS if it doesn't want to handle a function, or otherwise return the return value directly rather than through an argument. Additionally, cap_task_prctl() now prepares a new set of credentials, even if it doesn't end up using it. (9) Keyrings. A number of changes have been made to the keyrings code: (a) switch_uid_keyring(), copy_keys(), exit_keys() and suid_keys() have all been dropped and built in to the credentials functions directly. They may want separating out again later. (b) key_alloc() and search_process_keyrings() now take a cred pointer rather than a task pointer to specify the security context. (c) copy_creds() gives a new thread within the same thread group a new thread keyring if its parent had one, otherwise it discards the thread keyring. (d) The authorisation key now points directly to the credentials to extend the search into rather pointing to the task that carries them. (e) Installing thread, process or session keyrings causes a new set of credentials to be created, even though it's not strictly necessary for process or session keyrings (they're shared). (10) Usermode helper. The usermode helper code now carries a cred struct pointer in its subprocess_info struct instead of a new session keyring pointer. This set of credentials is derived from init_cred and installed on the new process after it has been cloned. call_usermodehelper_setup() allocates the new credentials and call_usermodehelper_freeinfo() discards them if they haven't been used. A special cred function (prepare_usermodeinfo_creds()) is provided specifically for call_usermodehelper_setup() to call. call_usermodehelper_setkeys() adjusts the credentials to sport the supplied keyring as the new session keyring. (11) SELinux. SELinux has a number of changes, in addition to those to support the LSM interface changes mentioned above: (a) selinux_setprocattr() no longer does its check for whether the current ptracer can access processes with the new SID inside the lock that covers getting the ptracer's SID. Whilst this lock ensures that the check is done with the ptracer pinned, the result is only valid until the lock is released, so there's no point doing it inside the lock. (12) is_single_threaded(). This function has been extracted from selinux_setprocattr() and put into a file of its own in the lib/ directory as join_session_keyring() now wants to use it too. The code in SELinux just checked to see whether a task shared mm_structs with other tasks (CLONE_VM), but that isn't good enough. We really want to know if they're part of the same thread group (CLONE_THREAD). (13) nfsd. The NFS server daemon now has to use the COW credentials to set the credentials it is going to use. It really needs to pass the credentials down to the functions it calls, but it can't do that until other patches in this series have been applied. Signed-off-by: David Howells <dhowells@redhat.com> Acked-by: James Morris <jmorris@namei.org> Signed-off-by: James Morris <jmorris@namei.org>
2008-11-14 07:39:23 +08:00
* @new contains the new credentials structure for target process.
* @old contains the current credentials structure for target process.
* @effective contains the effective capability set.
* @inheritable contains the inheritable capability set.
* @permitted contains the permitted capability set.
CRED: Inaugurate COW credentials Inaugurate copy-on-write credentials management. This uses RCU to manage the credentials pointer in the task_struct with respect to accesses by other tasks. A process may only modify its own credentials, and so does not need locking to access or modify its own credentials. A mutex (cred_replace_mutex) is added to the task_struct to control the effect of PTRACE_ATTACHED on credential calculations, particularly with respect to execve(). With this patch, the contents of an active credentials struct may not be changed directly; rather a new set of credentials must be prepared, modified and committed using something like the following sequence of events: struct cred *new = prepare_creds(); int ret = blah(new); if (ret < 0) { abort_creds(new); return ret; } return commit_creds(new); There are some exceptions to this rule: the keyrings pointed to by the active credentials may be instantiated - keyrings violate the COW rule as managing COW keyrings is tricky, given that it is possible for a task to directly alter the keys in a keyring in use by another task. To help enforce this, various pointers to sets of credentials, such as those in the task_struct, are declared const. The purpose of this is compile-time discouragement of altering credentials through those pointers. Once a set of credentials has been made public through one of these pointers, it may not be modified, except under special circumstances: (1) Its reference count may incremented and decremented. (2) The keyrings to which it points may be modified, but not replaced. The only safe way to modify anything else is to create a replacement and commit using the functions described in Documentation/credentials.txt (which will be added by a later patch). This patch and the preceding patches have been tested with the LTP SELinux testsuite. This patch makes several logical sets of alteration: (1) execve(). This now prepares and commits credentials in various places in the security code rather than altering the current creds directly. (2) Temporary credential overrides. do_coredump() and sys_faccessat() now prepare their own credentials and temporarily override the ones currently on the acting thread, whilst preventing interference from other threads by holding cred_replace_mutex on the thread being dumped. This will be replaced in a future patch by something that hands down the credentials directly to the functions being called, rather than altering the task's objective credentials. (3) LSM interface. A number of functions have been changed, added or removed: (*) security_capset_check(), ->capset_check() (*) security_capset_set(), ->capset_set() Removed in favour of security_capset(). (*) security_capset(), ->capset() New. This is passed a pointer to the new creds, a pointer to the old creds and the proposed capability sets. It should fill in the new creds or return an error. All pointers, barring the pointer to the new creds, are now const. (*) security_bprm_apply_creds(), ->bprm_apply_creds() Changed; now returns a value, which will cause the process to be killed if it's an error. (*) security_task_alloc(), ->task_alloc_security() Removed in favour of security_prepare_creds(). (*) security_cred_free(), ->cred_free() New. Free security data attached to cred->security. (*) security_prepare_creds(), ->cred_prepare() New. Duplicate any security data attached to cred->security. (*) security_commit_creds(), ->cred_commit() New. Apply any security effects for the upcoming installation of new security by commit_creds(). (*) security_task_post_setuid(), ->task_post_setuid() Removed in favour of security_task_fix_setuid(). (*) security_task_fix_setuid(), ->task_fix_setuid() Fix up the proposed new credentials for setuid(). This is used by cap_set_fix_setuid() to implicitly adjust capabilities in line with setuid() changes. Changes are made to the new credentials, rather than the task itself as in security_task_post_setuid(). (*) security_task_reparent_to_init(), ->task_reparent_to_init() Removed. Instead the task being reparented to init is referred directly to init's credentials. NOTE! This results in the loss of some state: SELinux's osid no longer records the sid of the thread that forked it. (*) security_key_alloc(), ->key_alloc() (*) security_key_permission(), ->key_permission() Changed. These now take cred pointers rather than task pointers to refer to the security context. (4) sys_capset(). This has been simplified and uses less locking. The LSM functions it calls have been merged. (5) reparent_to_kthreadd(). This gives the current thread the same credentials as init by simply using commit_thread() to point that way. (6) __sigqueue_alloc() and switch_uid() __sigqueue_alloc() can't stop the target task from changing its creds beneath it, so this function gets a reference to the currently applicable user_struct which it then passes into the sigqueue struct it returns if successful. switch_uid() is now called from commit_creds(), and possibly should be folded into that. commit_creds() should take care of protecting __sigqueue_alloc(). (7) [sg]et[ug]id() and co and [sg]et_current_groups. The set functions now all use prepare_creds(), commit_creds() and abort_creds() to build and check a new set of credentials before applying it. security_task_set[ug]id() is called inside the prepared section. This guarantees that nothing else will affect the creds until we've finished. The calling of set_dumpable() has been moved into commit_creds(). Much of the functionality of set_user() has been moved into commit_creds(). The get functions all simply access the data directly. (8) security_task_prctl() and cap_task_prctl(). security_task_prctl() has been modified to return -ENOSYS if it doesn't want to handle a function, or otherwise return the return value directly rather than through an argument. Additionally, cap_task_prctl() now prepares a new set of credentials, even if it doesn't end up using it. (9) Keyrings. A number of changes have been made to the keyrings code: (a) switch_uid_keyring(), copy_keys(), exit_keys() and suid_keys() have all been dropped and built in to the credentials functions directly. They may want separating out again later. (b) key_alloc() and search_process_keyrings() now take a cred pointer rather than a task pointer to specify the security context. (c) copy_creds() gives a new thread within the same thread group a new thread keyring if its parent had one, otherwise it discards the thread keyring. (d) The authorisation key now points directly to the credentials to extend the search into rather pointing to the task that carries them. (e) Installing thread, process or session keyrings causes a new set of credentials to be created, even though it's not strictly necessary for process or session keyrings (they're shared). (10) Usermode helper. The usermode helper code now carries a cred struct pointer in its subprocess_info struct instead of a new session keyring pointer. This set of credentials is derived from init_cred and installed on the new process after it has been cloned. call_usermodehelper_setup() allocates the new credentials and call_usermodehelper_freeinfo() discards them if they haven't been used. A special cred function (prepare_usermodeinfo_creds()) is provided specifically for call_usermodehelper_setup() to call. call_usermodehelper_setkeys() adjusts the credentials to sport the supplied keyring as the new session keyring. (11) SELinux. SELinux has a number of changes, in addition to those to support the LSM interface changes mentioned above: (a) selinux_setprocattr() no longer does its check for whether the current ptracer can access processes with the new SID inside the lock that covers getting the ptracer's SID. Whilst this lock ensures that the check is done with the ptracer pinned, the result is only valid until the lock is released, so there's no point doing it inside the lock. (12) is_single_threaded(). This function has been extracted from selinux_setprocattr() and put into a file of its own in the lib/ directory as join_session_keyring() now wants to use it too. The code in SELinux just checked to see whether a task shared mm_structs with other tasks (CLONE_VM), but that isn't good enough. We really want to know if they're part of the same thread group (CLONE_THREAD). (13) nfsd. The NFS server daemon now has to use the COW credentials to set the credentials it is going to use. It really needs to pass the credentials down to the functions it calls, but it can't do that until other patches in this series have been applied. Signed-off-by: David Howells <dhowells@redhat.com> Acked-by: James Morris <jmorris@namei.org> Signed-off-by: James Morris <jmorris@namei.org>
2008-11-14 07:39:23 +08:00
* Return 0 and update @new if permission is granted.
* @capable:
CRED: Fix regression in cap_capable() as shown up by sys_faccessat() [ver #3] Fix a regression in cap_capable() due to: commit 3b11a1decef07c19443d24ae926982bc8ec9f4c0 Author: David Howells <dhowells@redhat.com> Date: Fri Nov 14 10:39:26 2008 +1100 CRED: Differentiate objective and effective subjective credentials on a task The problem is that the above patch allows a process to have two sets of credentials, and for the most part uses the subjective credentials when accessing current's creds. There is, however, one exception: cap_capable(), and thus capable(), uses the real/objective credentials of the target task, whether or not it is the current task. Ordinarily this doesn't matter, since usually the two cred pointers in current point to the same set of creds. However, sys_faccessat() makes use of this facility to override the credentials of the calling process to make its test, without affecting the creds as seen from other processes. One of the things sys_faccessat() does is to make an adjustment to the effective capabilities mask, which cap_capable(), as it stands, then ignores. The affected capability check is in generic_permission(): if (!(mask & MAY_EXEC) || execute_ok(inode)) if (capable(CAP_DAC_OVERRIDE)) return 0; This change passes the set of credentials to be tested down into the commoncap and SELinux code. The security functions called by capable() and has_capability() select the appropriate set of credentials from the process being checked. This can be tested by compiling the following program from the XFS testsuite: /* * t_access_root.c - trivial test program to show permission bug. * * Written by Michael Kerrisk - copyright ownership not pursued. * Sourced from: http://linux.derkeiler.com/Mailing-Lists/Kernel/2003-10/6030.html */ #include <limits.h> #include <unistd.h> #include <stdio.h> #include <stdlib.h> #include <fcntl.h> #include <sys/stat.h> #define UID 500 #define GID 100 #define PERM 0 #define TESTPATH "/tmp/t_access" static void errExit(char *msg) { perror(msg); exit(EXIT_FAILURE); } /* errExit */ static void accessTest(char *file, int mask, char *mstr) { printf("access(%s, %s) returns %d\n", file, mstr, access(file, mask)); } /* accessTest */ int main(int argc, char *argv[]) { int fd, perm, uid, gid; char *testpath; char cmd[PATH_MAX + 20]; testpath = (argc > 1) ? argv[1] : TESTPATH; perm = (argc > 2) ? strtoul(argv[2], NULL, 8) : PERM; uid = (argc > 3) ? atoi(argv[3]) : UID; gid = (argc > 4) ? atoi(argv[4]) : GID; unlink(testpath); fd = open(testpath, O_RDWR | O_CREAT, 0); if (fd == -1) errExit("open"); if (fchown(fd, uid, gid) == -1) errExit("fchown"); if (fchmod(fd, perm) == -1) errExit("fchmod"); close(fd); snprintf(cmd, sizeof(cmd), "ls -l %s", testpath); system(cmd); if (seteuid(uid) == -1) errExit("seteuid"); accessTest(testpath, 0, "0"); accessTest(testpath, R_OK, "R_OK"); accessTest(testpath, W_OK, "W_OK"); accessTest(testpath, X_OK, "X_OK"); accessTest(testpath, R_OK | W_OK, "R_OK | W_OK"); accessTest(testpath, R_OK | X_OK, "R_OK | X_OK"); accessTest(testpath, W_OK | X_OK, "W_OK | X_OK"); accessTest(testpath, R_OK | W_OK | X_OK, "R_OK | W_OK | X_OK"); exit(EXIT_SUCCESS); } /* main */ This can be run against an Ext3 filesystem as well as against an XFS filesystem. If successful, it will show: [root@andromeda src]# ./t_access_root /tmp/xxx 0 4043 4043 ---------- 1 dhowells dhowells 0 2008-12-31 03:00 /tmp/xxx access(/tmp/xxx, 0) returns 0 access(/tmp/xxx, R_OK) returns 0 access(/tmp/xxx, W_OK) returns 0 access(/tmp/xxx, X_OK) returns -1 access(/tmp/xxx, R_OK | W_OK) returns 0 access(/tmp/xxx, R_OK | X_OK) returns -1 access(/tmp/xxx, W_OK | X_OK) returns -1 access(/tmp/xxx, R_OK | W_OK | X_OK) returns -1 If unsuccessful, it will show: [root@andromeda src]# ./t_access_root /tmp/xxx 0 4043 4043 ---------- 1 dhowells dhowells 0 2008-12-31 02:56 /tmp/xxx access(/tmp/xxx, 0) returns 0 access(/tmp/xxx, R_OK) returns -1 access(/tmp/xxx, W_OK) returns -1 access(/tmp/xxx, X_OK) returns -1 access(/tmp/xxx, R_OK | W_OK) returns -1 access(/tmp/xxx, R_OK | X_OK) returns -1 access(/tmp/xxx, W_OK | X_OK) returns -1 access(/tmp/xxx, R_OK | W_OK | X_OK) returns -1 I've also tested the fix with the SELinux and syscalls LTP testsuites. Signed-off-by: David Howells <dhowells@redhat.com> Tested-by: J. Bruce Fields <bfields@citi.umich.edu> Acked-by: Serge Hallyn <serue@us.ibm.com> Signed-off-by: James Morris <jmorris@namei.org>
2009-01-07 06:27:01 +08:00
* Check whether the @tsk process has the @cap capability in the indicated
* credentials.
* @tsk contains the task_struct for the process.
CRED: Fix regression in cap_capable() as shown up by sys_faccessat() [ver #3] Fix a regression in cap_capable() due to: commit 3b11a1decef07c19443d24ae926982bc8ec9f4c0 Author: David Howells <dhowells@redhat.com> Date: Fri Nov 14 10:39:26 2008 +1100 CRED: Differentiate objective and effective subjective credentials on a task The problem is that the above patch allows a process to have two sets of credentials, and for the most part uses the subjective credentials when accessing current's creds. There is, however, one exception: cap_capable(), and thus capable(), uses the real/objective credentials of the target task, whether or not it is the current task. Ordinarily this doesn't matter, since usually the two cred pointers in current point to the same set of creds. However, sys_faccessat() makes use of this facility to override the credentials of the calling process to make its test, without affecting the creds as seen from other processes. One of the things sys_faccessat() does is to make an adjustment to the effective capabilities mask, which cap_capable(), as it stands, then ignores. The affected capability check is in generic_permission(): if (!(mask & MAY_EXEC) || execute_ok(inode)) if (capable(CAP_DAC_OVERRIDE)) return 0; This change passes the set of credentials to be tested down into the commoncap and SELinux code. The security functions called by capable() and has_capability() select the appropriate set of credentials from the process being checked. This can be tested by compiling the following program from the XFS testsuite: /* * t_access_root.c - trivial test program to show permission bug. * * Written by Michael Kerrisk - copyright ownership not pursued. * Sourced from: http://linux.derkeiler.com/Mailing-Lists/Kernel/2003-10/6030.html */ #include <limits.h> #include <unistd.h> #include <stdio.h> #include <stdlib.h> #include <fcntl.h> #include <sys/stat.h> #define UID 500 #define GID 100 #define PERM 0 #define TESTPATH "/tmp/t_access" static void errExit(char *msg) { perror(msg); exit(EXIT_FAILURE); } /* errExit */ static void accessTest(char *file, int mask, char *mstr) { printf("access(%s, %s) returns %d\n", file, mstr, access(file, mask)); } /* accessTest */ int main(int argc, char *argv[]) { int fd, perm, uid, gid; char *testpath; char cmd[PATH_MAX + 20]; testpath = (argc > 1) ? argv[1] : TESTPATH; perm = (argc > 2) ? strtoul(argv[2], NULL, 8) : PERM; uid = (argc > 3) ? atoi(argv[3]) : UID; gid = (argc > 4) ? atoi(argv[4]) : GID; unlink(testpath); fd = open(testpath, O_RDWR | O_CREAT, 0); if (fd == -1) errExit("open"); if (fchown(fd, uid, gid) == -1) errExit("fchown"); if (fchmod(fd, perm) == -1) errExit("fchmod"); close(fd); snprintf(cmd, sizeof(cmd), "ls -l %s", testpath); system(cmd); if (seteuid(uid) == -1) errExit("seteuid"); accessTest(testpath, 0, "0"); accessTest(testpath, R_OK, "R_OK"); accessTest(testpath, W_OK, "W_OK"); accessTest(testpath, X_OK, "X_OK"); accessTest(testpath, R_OK | W_OK, "R_OK | W_OK"); accessTest(testpath, R_OK | X_OK, "R_OK | X_OK"); accessTest(testpath, W_OK | X_OK, "W_OK | X_OK"); accessTest(testpath, R_OK | W_OK | X_OK, "R_OK | W_OK | X_OK"); exit(EXIT_SUCCESS); } /* main */ This can be run against an Ext3 filesystem as well as against an XFS filesystem. If successful, it will show: [root@andromeda src]# ./t_access_root /tmp/xxx 0 4043 4043 ---------- 1 dhowells dhowells 0 2008-12-31 03:00 /tmp/xxx access(/tmp/xxx, 0) returns 0 access(/tmp/xxx, R_OK) returns 0 access(/tmp/xxx, W_OK) returns 0 access(/tmp/xxx, X_OK) returns -1 access(/tmp/xxx, R_OK | W_OK) returns 0 access(/tmp/xxx, R_OK | X_OK) returns -1 access(/tmp/xxx, W_OK | X_OK) returns -1 access(/tmp/xxx, R_OK | W_OK | X_OK) returns -1 If unsuccessful, it will show: [root@andromeda src]# ./t_access_root /tmp/xxx 0 4043 4043 ---------- 1 dhowells dhowells 0 2008-12-31 02:56 /tmp/xxx access(/tmp/xxx, 0) returns 0 access(/tmp/xxx, R_OK) returns -1 access(/tmp/xxx, W_OK) returns -1 access(/tmp/xxx, X_OK) returns -1 access(/tmp/xxx, R_OK | W_OK) returns -1 access(/tmp/xxx, R_OK | X_OK) returns -1 access(/tmp/xxx, W_OK | X_OK) returns -1 access(/tmp/xxx, R_OK | W_OK | X_OK) returns -1 I've also tested the fix with the SELinux and syscalls LTP testsuites. Signed-off-by: David Howells <dhowells@redhat.com> Tested-by: J. Bruce Fields <bfields@citi.umich.edu> Acked-by: Serge Hallyn <serue@us.ibm.com> Signed-off-by: James Morris <jmorris@namei.org>
2009-01-07 06:27:01 +08:00
* @cred contains the credentials to use.
* @cap contains the capability <include/linux/capability.h>.
CRED: Fix regression in cap_capable() as shown up by sys_faccessat() [ver #3] Fix a regression in cap_capable() due to: commit 3b11a1decef07c19443d24ae926982bc8ec9f4c0 Author: David Howells <dhowells@redhat.com> Date: Fri Nov 14 10:39:26 2008 +1100 CRED: Differentiate objective and effective subjective credentials on a task The problem is that the above patch allows a process to have two sets of credentials, and for the most part uses the subjective credentials when accessing current's creds. There is, however, one exception: cap_capable(), and thus capable(), uses the real/objective credentials of the target task, whether or not it is the current task. Ordinarily this doesn't matter, since usually the two cred pointers in current point to the same set of creds. However, sys_faccessat() makes use of this facility to override the credentials of the calling process to make its test, without affecting the creds as seen from other processes. One of the things sys_faccessat() does is to make an adjustment to the effective capabilities mask, which cap_capable(), as it stands, then ignores. The affected capability check is in generic_permission(): if (!(mask & MAY_EXEC) || execute_ok(inode)) if (capable(CAP_DAC_OVERRIDE)) return 0; This change passes the set of credentials to be tested down into the commoncap and SELinux code. The security functions called by capable() and has_capability() select the appropriate set of credentials from the process being checked. This can be tested by compiling the following program from the XFS testsuite: /* * t_access_root.c - trivial test program to show permission bug. * * Written by Michael Kerrisk - copyright ownership not pursued. * Sourced from: http://linux.derkeiler.com/Mailing-Lists/Kernel/2003-10/6030.html */ #include <limits.h> #include <unistd.h> #include <stdio.h> #include <stdlib.h> #include <fcntl.h> #include <sys/stat.h> #define UID 500 #define GID 100 #define PERM 0 #define TESTPATH "/tmp/t_access" static void errExit(char *msg) { perror(msg); exit(EXIT_FAILURE); } /* errExit */ static void accessTest(char *file, int mask, char *mstr) { printf("access(%s, %s) returns %d\n", file, mstr, access(file, mask)); } /* accessTest */ int main(int argc, char *argv[]) { int fd, perm, uid, gid; char *testpath; char cmd[PATH_MAX + 20]; testpath = (argc > 1) ? argv[1] : TESTPATH; perm = (argc > 2) ? strtoul(argv[2], NULL, 8) : PERM; uid = (argc > 3) ? atoi(argv[3]) : UID; gid = (argc > 4) ? atoi(argv[4]) : GID; unlink(testpath); fd = open(testpath, O_RDWR | O_CREAT, 0); if (fd == -1) errExit("open"); if (fchown(fd, uid, gid) == -1) errExit("fchown"); if (fchmod(fd, perm) == -1) errExit("fchmod"); close(fd); snprintf(cmd, sizeof(cmd), "ls -l %s", testpath); system(cmd); if (seteuid(uid) == -1) errExit("seteuid"); accessTest(testpath, 0, "0"); accessTest(testpath, R_OK, "R_OK"); accessTest(testpath, W_OK, "W_OK"); accessTest(testpath, X_OK, "X_OK"); accessTest(testpath, R_OK | W_OK, "R_OK | W_OK"); accessTest(testpath, R_OK | X_OK, "R_OK | X_OK"); accessTest(testpath, W_OK | X_OK, "W_OK | X_OK"); accessTest(testpath, R_OK | W_OK | X_OK, "R_OK | W_OK | X_OK"); exit(EXIT_SUCCESS); } /* main */ This can be run against an Ext3 filesystem as well as against an XFS filesystem. If successful, it will show: [root@andromeda src]# ./t_access_root /tmp/xxx 0 4043 4043 ---------- 1 dhowells dhowells 0 2008-12-31 03:00 /tmp/xxx access(/tmp/xxx, 0) returns 0 access(/tmp/xxx, R_OK) returns 0 access(/tmp/xxx, W_OK) returns 0 access(/tmp/xxx, X_OK) returns -1 access(/tmp/xxx, R_OK | W_OK) returns 0 access(/tmp/xxx, R_OK | X_OK) returns -1 access(/tmp/xxx, W_OK | X_OK) returns -1 access(/tmp/xxx, R_OK | W_OK | X_OK) returns -1 If unsuccessful, it will show: [root@andromeda src]# ./t_access_root /tmp/xxx 0 4043 4043 ---------- 1 dhowells dhowells 0 2008-12-31 02:56 /tmp/xxx access(/tmp/xxx, 0) returns 0 access(/tmp/xxx, R_OK) returns -1 access(/tmp/xxx, W_OK) returns -1 access(/tmp/xxx, X_OK) returns -1 access(/tmp/xxx, R_OK | W_OK) returns -1 access(/tmp/xxx, R_OK | X_OK) returns -1 access(/tmp/xxx, W_OK | X_OK) returns -1 access(/tmp/xxx, R_OK | W_OK | X_OK) returns -1 I've also tested the fix with the SELinux and syscalls LTP testsuites. Signed-off-by: David Howells <dhowells@redhat.com> Tested-by: J. Bruce Fields <bfields@citi.umich.edu> Acked-by: Serge Hallyn <serue@us.ibm.com> Signed-off-by: James Morris <jmorris@namei.org>
2009-01-07 06:27:01 +08:00
* @audit: Whether to write an audit message or not
* Return 0 if the capability is granted for @tsk.
* @sysctl:
* Check permission before accessing the @table sysctl variable in the
* manner specified by @op.
* @table contains the ctl_table structure for the sysctl variable.
* @op contains the operation (001 = search, 002 = write, 004 = read).
* Return 0 if permission is granted.
* @syslog:
* Check permission before accessing the kernel message ring or changing
* logging to the console.
* See the syslog(2) manual page for an explanation of the @type values.
* @type contains the type of action.
* @from_file indicates the context of action (if it came from /proc).
* Return 0 if permission is granted.
* @settime:
* Check permission to change the system time.
* struct timespec and timezone are defined in include/linux/time.h
* @ts contains new time
* @tz contains new timezone
* Return 0 if permission is granted.
* @vm_enough_memory:
* Check permissions for allocating a new virtual mapping.
* @mm contains the mm struct it is being added to.
* @pages contains the number of pages.
* Return 0 if permission is granted.
*
* @secid_to_secctx:
* Convert secid to security context. If secdata is NULL the length of
* the result will be returned in seclen, but no secdata will be returned.
* This does mean that the length could change between calls to check the
* length and the next call which actually allocates and returns the secdata.
* @secid contains the security ID.
* @secdata contains the pointer that stores the converted security context.
* @seclen pointer which contains the length of the data
* @secctx_to_secid:
* Convert security context to secid.
* @secid contains the pointer to the generated security ID.
* @secdata contains the security context.
*
* @release_secctx:
* Release the security context.
* @secdata contains the security context.
* @seclen contains the length of the security context.
*
* Security hooks for Audit
*
* @audit_rule_init:
* Allocate and initialize an LSM audit rule structure.
* @field contains the required Audit action. Fields flags are defined in include/linux/audit.h
* @op contains the operator the rule uses.
* @rulestr contains the context where the rule will be applied to.
* @lsmrule contains a pointer to receive the result.
* Return 0 if @lsmrule has been successfully set,
* -EINVAL in case of an invalid rule.
*
* @audit_rule_known:
* Specifies whether given @rule contains any fields related to current LSM.
* @rule contains the audit rule of interest.
* Return 1 in case of relation found, 0 otherwise.
*
* @audit_rule_match:
* Determine if given @secid matches a rule previously approved
* by @audit_rule_known.
* @secid contains the security id in question.
* @field contains the field which relates to current LSM.
* @op contains the operator that will be used for matching.
* @rule points to the audit rule that will be checked against.
* @actx points to the audit context associated with the check.
* Return 1 if secid matches the rule, 0 if it does not, -ERRNO on failure.
*
* @audit_rule_free:
* Deallocate the LSM audit rule structure previously allocated by
* audit_rule_init.
* @rule contains the allocated rule
*
LSM/SELinux: inode_{get,set,notify}secctx hooks to access LSM security context information. This patch introduces three new hooks. The inode_getsecctx hook is used to get all relevant information from an LSM about an inode. The inode_setsecctx is used to set both the in-core and on-disk state for the inode based on a context derived from inode_getsecctx.The final hook inode_notifysecctx will notify the LSM of a change for the in-core state of the inode in question. These hooks are for use in the labeled NFS code and addresses concerns of how to set security on an inode in a multi-xattr LSM. For historical reasons Stephen Smalley's explanation of the reason for these hooks is pasted below. Quote Stephen Smalley inode_setsecctx: Change the security context of an inode. Updates the in core security context managed by the security module and invokes the fs code as needed (via __vfs_setxattr_noperm) to update any backing xattrs that represent the context. Example usage: NFS server invokes this hook to change the security context in its incore inode and on the backing file system to a value provided by the client on a SETATTR operation. inode_notifysecctx: Notify the security module of what the security context of an inode should be. Initializes the incore security context managed by the security module for this inode. Example usage: NFS client invokes this hook to initialize the security context in its incore inode to the value provided by the server for the file when the server returned the file's attributes to the client. Signed-off-by: David P. Quigley <dpquigl@tycho.nsa.gov> Acked-by: Serge Hallyn <serue@us.ibm.com> Signed-off-by: James Morris <jmorris@namei.org>
2009-09-04 02:25:57 +08:00
* @inode_notifysecctx:
* Notify the security module of what the security context of an inode
* should be. Initializes the incore security context managed by the
* security module for this inode. Example usage: NFS client invokes
* this hook to initialize the security context in its incore inode to the
* value provided by the server for the file when the server returned the
* file's attributes to the client.
*
* Must be called with inode->i_mutex locked.
*
* @inode we wish to set the security context of.
* @ctx contains the string which we wish to set in the inode.
* @ctxlen contains the length of @ctx.
*
* @inode_setsecctx:
* Change the security context of an inode. Updates the
* incore security context managed by the security module and invokes the
* fs code as needed (via __vfs_setxattr_noperm) to update any backing
* xattrs that represent the context. Example usage: NFS server invokes
* this hook to change the security context in its incore inode and on the
* backing filesystem to a value provided by the client on a SETATTR
* operation.
*
* Must be called with inode->i_mutex locked.
*
* @dentry contains the inode we wish to set the security context of.
* @ctx contains the string which we wish to set in the inode.
* @ctxlen contains the length of @ctx.
*
* @inode_getsecctx:
* Returns a string containing all relavent security context information
*
* @inode we wish to set the security context of.
* @ctx is a pointer in which to place the allocated security context.
* @ctxlen points to the place to put the length of @ctx.
* This is the main security structure.
*/
struct security_operations {
char name[SECURITY_NAME_MAX + 1];
int (*ptrace_access_check) (struct task_struct *child, unsigned int mode);
security: Fix setting of PF_SUPERPRIV by __capable() Fix the setting of PF_SUPERPRIV by __capable() as it could corrupt the flags the target process if that is not the current process and it is trying to change its own flags in a different way at the same time. __capable() is using neither atomic ops nor locking to protect t->flags. This patch removes __capable() and introduces has_capability() that doesn't set PF_SUPERPRIV on the process being queried. This patch further splits security_ptrace() in two: (1) security_ptrace_may_access(). This passes judgement on whether one process may access another only (PTRACE_MODE_ATTACH for ptrace() and PTRACE_MODE_READ for /proc), and takes a pointer to the child process. current is the parent. (2) security_ptrace_traceme(). This passes judgement on PTRACE_TRACEME only, and takes only a pointer to the parent process. current is the child. In Smack and commoncap, this uses has_capability() to determine whether the parent will be permitted to use PTRACE_ATTACH if normal checks fail. This does not set PF_SUPERPRIV. Two of the instances of __capable() actually only act on current, and so have been changed to calls to capable(). Of the places that were using __capable(): (1) The OOM killer calls __capable() thrice when weighing the killability of a process. All of these now use has_capability(). (2) cap_ptrace() and smack_ptrace() were using __capable() to check to see whether the parent was allowed to trace any process. As mentioned above, these have been split. For PTRACE_ATTACH and /proc, capable() is now used, and for PTRACE_TRACEME, has_capability() is used. (3) cap_safe_nice() only ever saw current, so now uses capable(). (4) smack_setprocattr() rejected accesses to tasks other than current just after calling __capable(), so the order of these two tests have been switched and capable() is used instead. (5) In smack_file_send_sigiotask(), we need to allow privileged processes to receive SIGIO on files they're manipulating. (6) In smack_task_wait(), we let a process wait for a privileged process, whether or not the process doing the waiting is privileged. I've tested this with the LTP SELinux and syscalls testscripts. Signed-off-by: David Howells <dhowells@redhat.com> Acked-by: Serge Hallyn <serue@us.ibm.com> Acked-by: Casey Schaufler <casey@schaufler-ca.com> Acked-by: Andrew G. Morgan <morgan@kernel.org> Acked-by: Al Viro <viro@zeniv.linux.org.uk> Signed-off-by: James Morris <jmorris@namei.org>
2008-08-14 18:37:28 +08:00
int (*ptrace_traceme) (struct task_struct *parent);
int (*capget) (struct task_struct *target,
kernel_cap_t *effective,
kernel_cap_t *inheritable, kernel_cap_t *permitted);
CRED: Inaugurate COW credentials Inaugurate copy-on-write credentials management. This uses RCU to manage the credentials pointer in the task_struct with respect to accesses by other tasks. A process may only modify its own credentials, and so does not need locking to access or modify its own credentials. A mutex (cred_replace_mutex) is added to the task_struct to control the effect of PTRACE_ATTACHED on credential calculations, particularly with respect to execve(). With this patch, the contents of an active credentials struct may not be changed directly; rather a new set of credentials must be prepared, modified and committed using something like the following sequence of events: struct cred *new = prepare_creds(); int ret = blah(new); if (ret < 0) { abort_creds(new); return ret; } return commit_creds(new); There are some exceptions to this rule: the keyrings pointed to by the active credentials may be instantiated - keyrings violate the COW rule as managing COW keyrings is tricky, given that it is possible for a task to directly alter the keys in a keyring in use by another task. To help enforce this, various pointers to sets of credentials, such as those in the task_struct, are declared const. The purpose of this is compile-time discouragement of altering credentials through those pointers. Once a set of credentials has been made public through one of these pointers, it may not be modified, except under special circumstances: (1) Its reference count may incremented and decremented. (2) The keyrings to which it points may be modified, but not replaced. The only safe way to modify anything else is to create a replacement and commit using the functions described in Documentation/credentials.txt (which will be added by a later patch). This patch and the preceding patches have been tested with the LTP SELinux testsuite. This patch makes several logical sets of alteration: (1) execve(). This now prepares and commits credentials in various places in the security code rather than altering the current creds directly. (2) Temporary credential overrides. do_coredump() and sys_faccessat() now prepare their own credentials and temporarily override the ones currently on the acting thread, whilst preventing interference from other threads by holding cred_replace_mutex on the thread being dumped. This will be replaced in a future patch by something that hands down the credentials directly to the functions being called, rather than altering the task's objective credentials. (3) LSM interface. A number of functions have been changed, added or removed: (*) security_capset_check(), ->capset_check() (*) security_capset_set(), ->capset_set() Removed in favour of security_capset(). (*) security_capset(), ->capset() New. This is passed a pointer to the new creds, a pointer to the old creds and the proposed capability sets. It should fill in the new creds or return an error. All pointers, barring the pointer to the new creds, are now const. (*) security_bprm_apply_creds(), ->bprm_apply_creds() Changed; now returns a value, which will cause the process to be killed if it's an error. (*) security_task_alloc(), ->task_alloc_security() Removed in favour of security_prepare_creds(). (*) security_cred_free(), ->cred_free() New. Free security data attached to cred->security. (*) security_prepare_creds(), ->cred_prepare() New. Duplicate any security data attached to cred->security. (*) security_commit_creds(), ->cred_commit() New. Apply any security effects for the upcoming installation of new security by commit_creds(). (*) security_task_post_setuid(), ->task_post_setuid() Removed in favour of security_task_fix_setuid(). (*) security_task_fix_setuid(), ->task_fix_setuid() Fix up the proposed new credentials for setuid(). This is used by cap_set_fix_setuid() to implicitly adjust capabilities in line with setuid() changes. Changes are made to the new credentials, rather than the task itself as in security_task_post_setuid(). (*) security_task_reparent_to_init(), ->task_reparent_to_init() Removed. Instead the task being reparented to init is referred directly to init's credentials. NOTE! This results in the loss of some state: SELinux's osid no longer records the sid of the thread that forked it. (*) security_key_alloc(), ->key_alloc() (*) security_key_permission(), ->key_permission() Changed. These now take cred pointers rather than task pointers to refer to the security context. (4) sys_capset(). This has been simplified and uses less locking. The LSM functions it calls have been merged. (5) reparent_to_kthreadd(). This gives the current thread the same credentials as init by simply using commit_thread() to point that way. (6) __sigqueue_alloc() and switch_uid() __sigqueue_alloc() can't stop the target task from changing its creds beneath it, so this function gets a reference to the currently applicable user_struct which it then passes into the sigqueue struct it returns if successful. switch_uid() is now called from commit_creds(), and possibly should be folded into that. commit_creds() should take care of protecting __sigqueue_alloc(). (7) [sg]et[ug]id() and co and [sg]et_current_groups. The set functions now all use prepare_creds(), commit_creds() and abort_creds() to build and check a new set of credentials before applying it. security_task_set[ug]id() is called inside the prepared section. This guarantees that nothing else will affect the creds until we've finished. The calling of set_dumpable() has been moved into commit_creds(). Much of the functionality of set_user() has been moved into commit_creds(). The get functions all simply access the data directly. (8) security_task_prctl() and cap_task_prctl(). security_task_prctl() has been modified to return -ENOSYS if it doesn't want to handle a function, or otherwise return the return value directly rather than through an argument. Additionally, cap_task_prctl() now prepares a new set of credentials, even if it doesn't end up using it. (9) Keyrings. A number of changes have been made to the keyrings code: (a) switch_uid_keyring(), copy_keys(), exit_keys() and suid_keys() have all been dropped and built in to the credentials functions directly. They may want separating out again later. (b) key_alloc() and search_process_keyrings() now take a cred pointer rather than a task pointer to specify the security context. (c) copy_creds() gives a new thread within the same thread group a new thread keyring if its parent had one, otherwise it discards the thread keyring. (d) The authorisation key now points directly to the credentials to extend the search into rather pointing to the task that carries them. (e) Installing thread, process or session keyrings causes a new set of credentials to be created, even though it's not strictly necessary for process or session keyrings (they're shared). (10) Usermode helper. The usermode helper code now carries a cred struct pointer in its subprocess_info struct instead of a new session keyring pointer. This set of credentials is derived from init_cred and installed on the new process after it has been cloned. call_usermodehelper_setup() allocates the new credentials and call_usermodehelper_freeinfo() discards them if they haven't been used. A special cred function (prepare_usermodeinfo_creds()) is provided specifically for call_usermodehelper_setup() to call. call_usermodehelper_setkeys() adjusts the credentials to sport the supplied keyring as the new session keyring. (11) SELinux. SELinux has a number of changes, in addition to those to support the LSM interface changes mentioned above: (a) selinux_setprocattr() no longer does its check for whether the current ptracer can access processes with the new SID inside the lock that covers getting the ptracer's SID. Whilst this lock ensures that the check is done with the ptracer pinned, the result is only valid until the lock is released, so there's no point doing it inside the lock. (12) is_single_threaded(). This function has been extracted from selinux_setprocattr() and put into a file of its own in the lib/ directory as join_session_keyring() now wants to use it too. The code in SELinux just checked to see whether a task shared mm_structs with other tasks (CLONE_VM), but that isn't good enough. We really want to know if they're part of the same thread group (CLONE_THREAD). (13) nfsd. The NFS server daemon now has to use the COW credentials to set the credentials it is going to use. It really needs to pass the credentials down to the functions it calls, but it can't do that until other patches in this series have been applied. Signed-off-by: David Howells <dhowells@redhat.com> Acked-by: James Morris <jmorris@namei.org> Signed-off-by: James Morris <jmorris@namei.org>
2008-11-14 07:39:23 +08:00
int (*capset) (struct cred *new,
const struct cred *old,
const kernel_cap_t *effective,
const kernel_cap_t *inheritable,
const kernel_cap_t *permitted);
CRED: Fix regression in cap_capable() as shown up by sys_faccessat() [ver #3] Fix a regression in cap_capable() due to: commit 3b11a1decef07c19443d24ae926982bc8ec9f4c0 Author: David Howells <dhowells@redhat.com> Date: Fri Nov 14 10:39:26 2008 +1100 CRED: Differentiate objective and effective subjective credentials on a task The problem is that the above patch allows a process to have two sets of credentials, and for the most part uses the subjective credentials when accessing current's creds. There is, however, one exception: cap_capable(), and thus capable(), uses the real/objective credentials of the target task, whether or not it is the current task. Ordinarily this doesn't matter, since usually the two cred pointers in current point to the same set of creds. However, sys_faccessat() makes use of this facility to override the credentials of the calling process to make its test, without affecting the creds as seen from other processes. One of the things sys_faccessat() does is to make an adjustment to the effective capabilities mask, which cap_capable(), as it stands, then ignores. The affected capability check is in generic_permission(): if (!(mask & MAY_EXEC) || execute_ok(inode)) if (capable(CAP_DAC_OVERRIDE)) return 0; This change passes the set of credentials to be tested down into the commoncap and SELinux code. The security functions called by capable() and has_capability() select the appropriate set of credentials from the process being checked. This can be tested by compiling the following program from the XFS testsuite: /* * t_access_root.c - trivial test program to show permission bug. * * Written by Michael Kerrisk - copyright ownership not pursued. * Sourced from: http://linux.derkeiler.com/Mailing-Lists/Kernel/2003-10/6030.html */ #include <limits.h> #include <unistd.h> #include <stdio.h> #include <stdlib.h> #include <fcntl.h> #include <sys/stat.h> #define UID 500 #define GID 100 #define PERM 0 #define TESTPATH "/tmp/t_access" static void errExit(char *msg) { perror(msg); exit(EXIT_FAILURE); } /* errExit */ static void accessTest(char *file, int mask, char *mstr) { printf("access(%s, %s) returns %d\n", file, mstr, access(file, mask)); } /* accessTest */ int main(int argc, char *argv[]) { int fd, perm, uid, gid; char *testpath; char cmd[PATH_MAX + 20]; testpath = (argc > 1) ? argv[1] : TESTPATH; perm = (argc > 2) ? strtoul(argv[2], NULL, 8) : PERM; uid = (argc > 3) ? atoi(argv[3]) : UID; gid = (argc > 4) ? atoi(argv[4]) : GID; unlink(testpath); fd = open(testpath, O_RDWR | O_CREAT, 0); if (fd == -1) errExit("open"); if (fchown(fd, uid, gid) == -1) errExit("fchown"); if (fchmod(fd, perm) == -1) errExit("fchmod"); close(fd); snprintf(cmd, sizeof(cmd), "ls -l %s", testpath); system(cmd); if (seteuid(uid) == -1) errExit("seteuid"); accessTest(testpath, 0, "0"); accessTest(testpath, R_OK, "R_OK"); accessTest(testpath, W_OK, "W_OK"); accessTest(testpath, X_OK, "X_OK"); accessTest(testpath, R_OK | W_OK, "R_OK | W_OK"); accessTest(testpath, R_OK | X_OK, "R_OK | X_OK"); accessTest(testpath, W_OK | X_OK, "W_OK | X_OK"); accessTest(testpath, R_OK | W_OK | X_OK, "R_OK | W_OK | X_OK"); exit(EXIT_SUCCESS); } /* main */ This can be run against an Ext3 filesystem as well as against an XFS filesystem. If successful, it will show: [root@andromeda src]# ./t_access_root /tmp/xxx 0 4043 4043 ---------- 1 dhowells dhowells 0 2008-12-31 03:00 /tmp/xxx access(/tmp/xxx, 0) returns 0 access(/tmp/xxx, R_OK) returns 0 access(/tmp/xxx, W_OK) returns 0 access(/tmp/xxx, X_OK) returns -1 access(/tmp/xxx, R_OK | W_OK) returns 0 access(/tmp/xxx, R_OK | X_OK) returns -1 access(/tmp/xxx, W_OK | X_OK) returns -1 access(/tmp/xxx, R_OK | W_OK | X_OK) returns -1 If unsuccessful, it will show: [root@andromeda src]# ./t_access_root /tmp/xxx 0 4043 4043 ---------- 1 dhowells dhowells 0 2008-12-31 02:56 /tmp/xxx access(/tmp/xxx, 0) returns 0 access(/tmp/xxx, R_OK) returns -1 access(/tmp/xxx, W_OK) returns -1 access(/tmp/xxx, X_OK) returns -1 access(/tmp/xxx, R_OK | W_OK) returns -1 access(/tmp/xxx, R_OK | X_OK) returns -1 access(/tmp/xxx, W_OK | X_OK) returns -1 access(/tmp/xxx, R_OK | W_OK | X_OK) returns -1 I've also tested the fix with the SELinux and syscalls LTP testsuites. Signed-off-by: David Howells <dhowells@redhat.com> Tested-by: J. Bruce Fields <bfields@citi.umich.edu> Acked-by: Serge Hallyn <serue@us.ibm.com> Signed-off-by: James Morris <jmorris@namei.org>
2009-01-07 06:27:01 +08:00
int (*capable) (struct task_struct *tsk, const struct cred *cred,
int cap, int audit);
int (*sysctl) (struct ctl_table *table, int op);
int (*quotactl) (int cmds, int type, int id, struct super_block *sb);
int (*quota_on) (struct dentry *dentry);
int (*syslog) (int type);
int (*settime) (struct timespec *ts, struct timezone *tz);
int (*vm_enough_memory) (struct mm_struct *mm, long pages);
CRED: Make execve() take advantage of copy-on-write credentials Make execve() take advantage of copy-on-write credentials, allowing it to set up the credentials in advance, and then commit the whole lot after the point of no return. This patch and the preceding patches have been tested with the LTP SELinux testsuite. This patch makes several logical sets of alteration: (1) execve(). The credential bits from struct linux_binprm are, for the most part, replaced with a single credentials pointer (bprm->cred). This means that all the creds can be calculated in advance and then applied at the point of no return with no possibility of failure. I would like to replace bprm->cap_effective with: cap_isclear(bprm->cap_effective) but this seems impossible due to special behaviour for processes of pid 1 (they always retain their parent's capability masks where normally they'd be changed - see cap_bprm_set_creds()). The following sequence of events now happens: (a) At the start of do_execve, the current task's cred_exec_mutex is locked to prevent PTRACE_ATTACH from obsoleting the calculation of creds that we make. (a) prepare_exec_creds() is then called to make a copy of the current task's credentials and prepare it. This copy is then assigned to bprm->cred. This renders security_bprm_alloc() and security_bprm_free() unnecessary, and so they've been removed. (b) The determination of unsafe execution is now performed immediately after (a) rather than later on in the code. The result is stored in bprm->unsafe for future reference. (c) prepare_binprm() is called, possibly multiple times. (i) This applies the result of set[ug]id binaries to the new creds attached to bprm->cred. Personality bit clearance is recorded, but now deferred on the basis that the exec procedure may yet fail. (ii) This then calls the new security_bprm_set_creds(). This should calculate the new LSM and capability credentials into *bprm->cred. This folds together security_bprm_set() and parts of security_bprm_apply_creds() (these two have been removed). Anything that might fail must be done at this point. (iii) bprm->cred_prepared is set to 1. bprm->cred_prepared is 0 on the first pass of the security calculations, and 1 on all subsequent passes. This allows SELinux in (ii) to base its calculations only on the initial script and not on the interpreter. (d) flush_old_exec() is called to commit the task to execution. This performs the following steps with regard to credentials: (i) Clear pdeath_signal and set dumpable on certain circumstances that may not be covered by commit_creds(). (ii) Clear any bits in current->personality that were deferred from (c.i). (e) install_exec_creds() [compute_creds() as was] is called to install the new credentials. This performs the following steps with regard to credentials: (i) Calls security_bprm_committing_creds() to apply any security requirements, such as flushing unauthorised files in SELinux, that must be done before the credentials are changed. This is made up of bits of security_bprm_apply_creds() and security_bprm_post_apply_creds(), both of which have been removed. This function is not allowed to fail; anything that might fail must have been done in (c.ii). (ii) Calls commit_creds() to apply the new credentials in a single assignment (more or less). Possibly pdeath_signal and dumpable should be part of struct creds. (iii) Unlocks the task's cred_replace_mutex, thus allowing PTRACE_ATTACH to take place. (iv) Clears The bprm->cred pointer as the credentials it was holding are now immutable. (v) Calls security_bprm_committed_creds() to apply any security alterations that must be done after the creds have been changed. SELinux uses this to flush signals and signal handlers. (f) If an error occurs before (d.i), bprm_free() will call abort_creds() to destroy the proposed new credentials and will then unlock cred_replace_mutex. No changes to the credentials will have been made. (2) LSM interface. A number of functions have been changed, added or removed: (*) security_bprm_alloc(), ->bprm_alloc_security() (*) security_bprm_free(), ->bprm_free_security() Removed in favour of preparing new credentials and modifying those. (*) security_bprm_apply_creds(), ->bprm_apply_creds() (*) security_bprm_post_apply_creds(), ->bprm_post_apply_creds() Removed; split between security_bprm_set_creds(), security_bprm_committing_creds() and security_bprm_committed_creds(). (*) security_bprm_set(), ->bprm_set_security() Removed; folded into security_bprm_set_creds(). (*) security_bprm_set_creds(), ->bprm_set_creds() New. The new credentials in bprm->creds should be checked and set up as appropriate. bprm->cred_prepared is 0 on the first call, 1 on the second and subsequent calls. (*) security_bprm_committing_creds(), ->bprm_committing_creds() (*) security_bprm_committed_creds(), ->bprm_committed_creds() New. Apply the security effects of the new credentials. This includes closing unauthorised files in SELinux. This function may not fail. When the former is called, the creds haven't yet been applied to the process; when the latter is called, they have. The former may access bprm->cred, the latter may not. (3) SELinux. SELinux has a number of changes, in addition to those to support the LSM interface changes mentioned above: (a) The bprm_security_struct struct has been removed in favour of using the credentials-under-construction approach. (c) flush_unauthorized_files() now takes a cred pointer and passes it on to inode_has_perm(), file_has_perm() and dentry_open(). Signed-off-by: David Howells <dhowells@redhat.com> Acked-by: James Morris <jmorris@namei.org> Acked-by: Serge Hallyn <serue@us.ibm.com> Signed-off-by: James Morris <jmorris@namei.org>
2008-11-14 07:39:24 +08:00
int (*bprm_set_creds) (struct linux_binprm *bprm);
int (*bprm_check_security) (struct linux_binprm *bprm);
int (*bprm_secureexec) (struct linux_binprm *bprm);
CRED: Make execve() take advantage of copy-on-write credentials Make execve() take advantage of copy-on-write credentials, allowing it to set up the credentials in advance, and then commit the whole lot after the point of no return. This patch and the preceding patches have been tested with the LTP SELinux testsuite. This patch makes several logical sets of alteration: (1) execve(). The credential bits from struct linux_binprm are, for the most part, replaced with a single credentials pointer (bprm->cred). This means that all the creds can be calculated in advance and then applied at the point of no return with no possibility of failure. I would like to replace bprm->cap_effective with: cap_isclear(bprm->cap_effective) but this seems impossible due to special behaviour for processes of pid 1 (they always retain their parent's capability masks where normally they'd be changed - see cap_bprm_set_creds()). The following sequence of events now happens: (a) At the start of do_execve, the current task's cred_exec_mutex is locked to prevent PTRACE_ATTACH from obsoleting the calculation of creds that we make. (a) prepare_exec_creds() is then called to make a copy of the current task's credentials and prepare it. This copy is then assigned to bprm->cred. This renders security_bprm_alloc() and security_bprm_free() unnecessary, and so they've been removed. (b) The determination of unsafe execution is now performed immediately after (a) rather than later on in the code. The result is stored in bprm->unsafe for future reference. (c) prepare_binprm() is called, possibly multiple times. (i) This applies the result of set[ug]id binaries to the new creds attached to bprm->cred. Personality bit clearance is recorded, but now deferred on the basis that the exec procedure may yet fail. (ii) This then calls the new security_bprm_set_creds(). This should calculate the new LSM and capability credentials into *bprm->cred. This folds together security_bprm_set() and parts of security_bprm_apply_creds() (these two have been removed). Anything that might fail must be done at this point. (iii) bprm->cred_prepared is set to 1. bprm->cred_prepared is 0 on the first pass of the security calculations, and 1 on all subsequent passes. This allows SELinux in (ii) to base its calculations only on the initial script and not on the interpreter. (d) flush_old_exec() is called to commit the task to execution. This performs the following steps with regard to credentials: (i) Clear pdeath_signal and set dumpable on certain circumstances that may not be covered by commit_creds(). (ii) Clear any bits in current->personality that were deferred from (c.i). (e) install_exec_creds() [compute_creds() as was] is called to install the new credentials. This performs the following steps with regard to credentials: (i) Calls security_bprm_committing_creds() to apply any security requirements, such as flushing unauthorised files in SELinux, that must be done before the credentials are changed. This is made up of bits of security_bprm_apply_creds() and security_bprm_post_apply_creds(), both of which have been removed. This function is not allowed to fail; anything that might fail must have been done in (c.ii). (ii) Calls commit_creds() to apply the new credentials in a single assignment (more or less). Possibly pdeath_signal and dumpable should be part of struct creds. (iii) Unlocks the task's cred_replace_mutex, thus allowing PTRACE_ATTACH to take place. (iv) Clears The bprm->cred pointer as the credentials it was holding are now immutable. (v) Calls security_bprm_committed_creds() to apply any security alterations that must be done after the creds have been changed. SELinux uses this to flush signals and signal handlers. (f) If an error occurs before (d.i), bprm_free() will call abort_creds() to destroy the proposed new credentials and will then unlock cred_replace_mutex. No changes to the credentials will have been made. (2) LSM interface. A number of functions have been changed, added or removed: (*) security_bprm_alloc(), ->bprm_alloc_security() (*) security_bprm_free(), ->bprm_free_security() Removed in favour of preparing new credentials and modifying those. (*) security_bprm_apply_creds(), ->bprm_apply_creds() (*) security_bprm_post_apply_creds(), ->bprm_post_apply_creds() Removed; split between security_bprm_set_creds(), security_bprm_committing_creds() and security_bprm_committed_creds(). (*) security_bprm_set(), ->bprm_set_security() Removed; folded into security_bprm_set_creds(). (*) security_bprm_set_creds(), ->bprm_set_creds() New. The new credentials in bprm->creds should be checked and set up as appropriate. bprm->cred_prepared is 0 on the first call, 1 on the second and subsequent calls. (*) security_bprm_committing_creds(), ->bprm_committing_creds() (*) security_bprm_committed_creds(), ->bprm_committed_creds() New. Apply the security effects of the new credentials. This includes closing unauthorised files in SELinux. This function may not fail. When the former is called, the creds haven't yet been applied to the process; when the latter is called, they have. The former may access bprm->cred, the latter may not. (3) SELinux. SELinux has a number of changes, in addition to those to support the LSM interface changes mentioned above: (a) The bprm_security_struct struct has been removed in favour of using the credentials-under-construction approach. (c) flush_unauthorized_files() now takes a cred pointer and passes it on to inode_has_perm(), file_has_perm() and dentry_open(). Signed-off-by: David Howells <dhowells@redhat.com> Acked-by: James Morris <jmorris@namei.org> Acked-by: Serge Hallyn <serue@us.ibm.com> Signed-off-by: James Morris <jmorris@namei.org>
2008-11-14 07:39:24 +08:00
void (*bprm_committing_creds) (struct linux_binprm *bprm);
void (*bprm_committed_creds) (struct linux_binprm *bprm);
int (*sb_alloc_security) (struct super_block *sb);
void (*sb_free_security) (struct super_block *sb);
int (*sb_copy_data) (char *orig, char *copy);
int (*sb_kern_mount) (struct super_block *sb, int flags, void *data);
int (*sb_show_options) (struct seq_file *m, struct super_block *sb);
int (*sb_statfs) (struct dentry *dentry);
int (*sb_mount) (char *dev_name, struct path *path,
char *type, unsigned long flags, void *data);
int (*sb_umount) (struct vfsmount *mnt, int flags);
int (*sb_pivotroot) (struct path *old_path,
struct path *new_path);
int (*sb_set_mnt_opts) (struct super_block *sb,
struct security_mnt_opts *opts);
Security: add get, set, and cloning of superblock security information Adds security_get_sb_mnt_opts, security_set_sb_mnt_opts, and security_clont_sb_mnt_opts to the LSM and to SELinux. This will allow filesystems to directly own and control all of their mount options if they so choose. This interface deals only with option identifiers and strings so it should generic enough for any LSM which may come in the future. Filesystems which pass text mount data around in the kernel (almost all of them) need not currently make use of this interface when dealing with SELinux since it will still parse those strings as it always has. I assume future LSM's would do the same. NFS is the primary FS which does not use text mount data and thus must make use of this interface. An LSM would need to implement these functions only if they had mount time options, such as selinux has context= or fscontext=. If the LSM has no mount time options they could simply not implement and let the dummy ops take care of things. An LSM other than SELinux would need to define new option numbers in security.h and any FS which decides to own there own security options would need to be patched to use this new interface for every possible LSM. This is because it was stated to me very clearly that LSM's should not attempt to understand FS mount data and the burdon to understand security should be in the FS which owns the options. Signed-off-by: Eric Paris <eparis@redhat.com> Acked-by: Stephen D. Smalley <sds@tycho.nsa.gov> Signed-off-by: James Morris <jmorris@namei.org>
2007-12-01 02:00:35 +08:00
void (*sb_clone_mnt_opts) (const struct super_block *oldsb,
struct super_block *newsb);
int (*sb_parse_opts_str) (char *options, struct security_mnt_opts *opts);
#ifdef CONFIG_SECURITY_PATH
int (*path_unlink) (struct path *dir, struct dentry *dentry);
int (*path_mkdir) (struct path *dir, struct dentry *dentry, int mode);
int (*path_rmdir) (struct path *dir, struct dentry *dentry);
int (*path_mknod) (struct path *dir, struct dentry *dentry, int mode,
unsigned int dev);
int (*path_truncate) (struct path *path);
int (*path_symlink) (struct path *dir, struct dentry *dentry,
const char *old_name);
int (*path_link) (struct dentry *old_dentry, struct path *new_dir,
struct dentry *new_dentry);
int (*path_rename) (struct path *old_dir, struct dentry *old_dentry,
struct path *new_dir, struct dentry *new_dentry);
int (*path_chmod) (struct dentry *dentry, struct vfsmount *mnt,
mode_t mode);
int (*path_chown) (struct path *path, uid_t uid, gid_t gid);
int (*path_chroot) (struct path *path);
#endif
int (*inode_alloc_security) (struct inode *inode);
void (*inode_free_security) (struct inode *inode);
[PATCH] security: enable atomic inode security labeling The following patch set enables atomic security labeling of newly created inodes by altering the fs code to invoke a new LSM hook to obtain the security attribute to apply to a newly created inode and to set up the incore inode security state during the inode creation transaction. This parallels the existing processing for setting ACLs on newly created inodes. Otherwise, it is possible for new inodes to be accessed by another thread via the dcache prior to complete security setup (presently handled by the post_create/mkdir/... LSM hooks in the VFS) and a newly created inode may be left unlabeled on the disk in the event of a crash. SELinux presently works around the issue by ensuring that the incore inode security label is initialized to a special SID that is inaccessible to unprivileged processes (in accordance with policy), thereby preventing inappropriate access but potentially causing false denials on legitimate accesses. A simple test program demonstrates such false denials on SELinux, and the patch solves the problem. Similar such false denials have been encountered in real applications. This patch defines a new inode_init_security LSM hook to obtain the security attribute to apply to a newly created inode and to set up the incore inode security state for it, and adds a corresponding hook function implementation to SELinux. Signed-off-by: Stephen Smalley <sds@tycho.nsa.gov> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-09-10 04:01:35 +08:00
int (*inode_init_security) (struct inode *inode, struct inode *dir,
char **name, void **value, size_t *len);
int (*inode_create) (struct inode *dir,
struct dentry *dentry, int mode);
int (*inode_link) (struct dentry *old_dentry,
struct inode *dir, struct dentry *new_dentry);
int (*inode_unlink) (struct inode *dir, struct dentry *dentry);
int (*inode_symlink) (struct inode *dir,
struct dentry *dentry, const char *old_name);
int (*inode_mkdir) (struct inode *dir, struct dentry *dentry, int mode);
int (*inode_rmdir) (struct inode *dir, struct dentry *dentry);
int (*inode_mknod) (struct inode *dir, struct dentry *dentry,
int mode, dev_t dev);
int (*inode_rename) (struct inode *old_dir, struct dentry *old_dentry,
struct inode *new_dir, struct dentry *new_dentry);
int (*inode_readlink) (struct dentry *dentry);
int (*inode_follow_link) (struct dentry *dentry, struct nameidata *nd);
int (*inode_permission) (struct inode *inode, int mask);
int (*inode_setattr) (struct dentry *dentry, struct iattr *attr);
int (*inode_getattr) (struct vfsmount *mnt, struct dentry *dentry);
int (*inode_setxattr) (struct dentry *dentry, const char *name,
const void *value, size_t size, int flags);
void (*inode_post_setxattr) (struct dentry *dentry, const char *name,
const void *value, size_t size, int flags);
int (*inode_getxattr) (struct dentry *dentry, const char *name);
int (*inode_listxattr) (struct dentry *dentry);
int (*inode_removexattr) (struct dentry *dentry, const char *name);
Implement file posix capabilities Implement file posix capabilities. This allows programs to be given a subset of root's powers regardless of who runs them, without having to use setuid and giving the binary all of root's powers. This version works with Kaigai Kohei's userspace tools, found at http://www.kaigai.gr.jp/index.php. For more information on how to use this patch, Chris Friedhoff has posted a nice page at http://www.friedhoff.org/fscaps.html. Changelog: Nov 27: Incorporate fixes from Andrew Morton (security-introduce-file-caps-tweaks and security-introduce-file-caps-warning-fix) Fix Kconfig dependency. Fix change signaling behavior when file caps are not compiled in. Nov 13: Integrate comments from Alexey: Remove CONFIG_ ifdef from capability.h, and use %zd for printing a size_t. Nov 13: Fix endianness warnings by sparse as suggested by Alexey Dobriyan. Nov 09: Address warnings of unused variables at cap_bprm_set_security when file capabilities are disabled, and simultaneously clean up the code a little, by pulling the new code into a helper function. Nov 08: For pointers to required userspace tools and how to use them, see http://www.friedhoff.org/fscaps.html. Nov 07: Fix the calculation of the highest bit checked in check_cap_sanity(). Nov 07: Allow file caps to be enabled without CONFIG_SECURITY, since capabilities are the default. Hook cap_task_setscheduler when !CONFIG_SECURITY. Move capable(TASK_KILL) to end of cap_task_kill to reduce audit messages. Nov 05: Add secondary calls in selinux/hooks.c to task_setioprio and task_setscheduler so that selinux and capabilities with file cap support can be stacked. Sep 05: As Seth Arnold points out, uid checks are out of place for capability code. Sep 01: Define task_setscheduler, task_setioprio, cap_task_kill, and task_setnice to make sure a user cannot affect a process in which they called a program with some fscaps. One remaining question is the note under task_setscheduler: are we ok with CAP_SYS_NICE being sufficient to confine a process to a cpuset? It is a semantic change, as without fsccaps, attach_task doesn't allow CAP_SYS_NICE to override the uid equivalence check. But since it uses security_task_setscheduler, which elsewhere is used where CAP_SYS_NICE can be used to override the uid equivalence check, fixing it might be tough. task_setscheduler note: this also controls cpuset:attach_task. Are we ok with CAP_SYS_NICE being used to confine to a cpuset? task_setioprio task_setnice sys_setpriority uses this (through set_one_prio) for another process. Need same checks as setrlimit Aug 21: Updated secureexec implementation to reflect the fact that euid and uid might be the same and nonzero, but the process might still have elevated caps. Aug 15: Handle endianness of xattrs. Enforce capability version match between kernel and disk. Enforce that no bits beyond the known max capability are set, else return -EPERM. With this extra processing, it may be worth reconsidering doing all the work at bprm_set_security rather than d_instantiate. Aug 10: Always call getxattr at bprm_set_security, rather than caching it at d_instantiate. [morgan@kernel.org: file-caps clean up for linux/capability.h] [bunk@kernel.org: unexport cap_inode_killpriv] Signed-off-by: Serge E. Hallyn <serue@us.ibm.com> Cc: Stephen Smalley <sds@tycho.nsa.gov> Cc: James Morris <jmorris@namei.org> Cc: Chris Wright <chrisw@sous-sol.org> Cc: Andrew Morgan <morgan@kernel.org> Signed-off-by: Andrew Morgan <morgan@kernel.org> Signed-off-by: Adrian Bunk <bunk@kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2007-10-17 14:31:36 +08:00
int (*inode_need_killpriv) (struct dentry *dentry);
int (*inode_killpriv) (struct dentry *dentry);
int (*inode_getsecurity) (const struct inode *inode, const char *name, void **buffer, bool alloc);
int (*inode_setsecurity) (struct inode *inode, const char *name, const void *value, size_t size, int flags);
int (*inode_listsecurity) (struct inode *inode, char *buffer, size_t buffer_size);
void (*inode_getsecid) (const struct inode *inode, u32 *secid);
int (*file_permission) (struct file *file, int mask);
int (*file_alloc_security) (struct file *file);
void (*file_free_security) (struct file *file);
int (*file_ioctl) (struct file *file, unsigned int cmd,
unsigned long arg);
int (*file_mmap) (struct file *file,
unsigned long reqprot, unsigned long prot,
unsigned long flags, unsigned long addr,
unsigned long addr_only);
int (*file_mprotect) (struct vm_area_struct *vma,
unsigned long reqprot,
unsigned long prot);
int (*file_lock) (struct file *file, unsigned int cmd);
int (*file_fcntl) (struct file *file, unsigned int cmd,
unsigned long arg);
int (*file_set_fowner) (struct file *file);
int (*file_send_sigiotask) (struct task_struct *tsk,
struct fown_struct *fown, int sig);
int (*file_receive) (struct file *file);
int (*dentry_open) (struct file *file, const struct cred *cred);
int (*task_create) (unsigned long clone_flags);
KEYS: Add a keyctl to install a process's session keyring on its parent [try #6] Add a keyctl to install a process's session keyring onto its parent. This replaces the parent's session keyring. Because the COW credential code does not permit one process to change another process's credentials directly, the change is deferred until userspace next starts executing again. Normally this will be after a wait*() syscall. To support this, three new security hooks have been provided: cred_alloc_blank() to allocate unset security creds, cred_transfer() to fill in the blank security creds and key_session_to_parent() - which asks the LSM if the process may replace its parent's session keyring. The replacement may only happen if the process has the same ownership details as its parent, and the process has LINK permission on the session keyring, and the session keyring is owned by the process, and the LSM permits it. Note that this requires alteration to each architecture's notify_resume path. This has been done for all arches barring blackfin, m68k* and xtensa, all of which need assembly alteration to support TIF_NOTIFY_RESUME. This allows the replacement to be performed at the point the parent process resumes userspace execution. This allows the userspace AFS pioctl emulation to fully emulate newpag() and the VIOCSETTOK and VIOCSETTOK2 pioctls, all of which require the ability to alter the parent process's PAG membership. However, since kAFS doesn't use PAGs per se, but rather dumps the keys into the session keyring, the session keyring of the parent must be replaced if, for example, VIOCSETTOK is passed the newpag flag. This can be tested with the following program: #include <stdio.h> #include <stdlib.h> #include <keyutils.h> #define KEYCTL_SESSION_TO_PARENT 18 #define OSERROR(X, S) do { if ((long)(X) == -1) { perror(S); exit(1); } } while(0) int main(int argc, char **argv) { key_serial_t keyring, key; long ret; keyring = keyctl_join_session_keyring(argv[1]); OSERROR(keyring, "keyctl_join_session_keyring"); key = add_key("user", "a", "b", 1, keyring); OSERROR(key, "add_key"); ret = keyctl(KEYCTL_SESSION_TO_PARENT); OSERROR(ret, "KEYCTL_SESSION_TO_PARENT"); return 0; } Compiled and linked with -lkeyutils, you should see something like: [dhowells@andromeda ~]$ keyctl show Session Keyring -3 --alswrv 4043 4043 keyring: _ses 355907932 --alswrv 4043 -1 \_ keyring: _uid.4043 [dhowells@andromeda ~]$ /tmp/newpag [dhowells@andromeda ~]$ keyctl show Session Keyring -3 --alswrv 4043 4043 keyring: _ses 1055658746 --alswrv 4043 4043 \_ user: a [dhowells@andromeda ~]$ /tmp/newpag hello [dhowells@andromeda ~]$ keyctl show Session Keyring -3 --alswrv 4043 4043 keyring: hello 340417692 --alswrv 4043 4043 \_ user: a Where the test program creates a new session keyring, sticks a user key named 'a' into it and then installs it on its parent. Signed-off-by: David Howells <dhowells@redhat.com> Signed-off-by: James Morris <jmorris@namei.org>
2009-09-02 16:14:21 +08:00
int (*cred_alloc_blank) (struct cred *cred, gfp_t gfp);
void (*cred_free) (struct cred *cred);
CRED: Inaugurate COW credentials Inaugurate copy-on-write credentials management. This uses RCU to manage the credentials pointer in the task_struct with respect to accesses by other tasks. A process may only modify its own credentials, and so does not need locking to access or modify its own credentials. A mutex (cred_replace_mutex) is added to the task_struct to control the effect of PTRACE_ATTACHED on credential calculations, particularly with respect to execve(). With this patch, the contents of an active credentials struct may not be changed directly; rather a new set of credentials must be prepared, modified and committed using something like the following sequence of events: struct cred *new = prepare_creds(); int ret = blah(new); if (ret < 0) { abort_creds(new); return ret; } return commit_creds(new); There are some exceptions to this rule: the keyrings pointed to by the active credentials may be instantiated - keyrings violate the COW rule as managing COW keyrings is tricky, given that it is possible for a task to directly alter the keys in a keyring in use by another task. To help enforce this, various pointers to sets of credentials, such as those in the task_struct, are declared const. The purpose of this is compile-time discouragement of altering credentials through those pointers. Once a set of credentials has been made public through one of these pointers, it may not be modified, except under special circumstances: (1) Its reference count may incremented and decremented. (2) The keyrings to which it points may be modified, but not replaced. The only safe way to modify anything else is to create a replacement and commit using the functions described in Documentation/credentials.txt (which will be added by a later patch). This patch and the preceding patches have been tested with the LTP SELinux testsuite. This patch makes several logical sets of alteration: (1) execve(). This now prepares and commits credentials in various places in the security code rather than altering the current creds directly. (2) Temporary credential overrides. do_coredump() and sys_faccessat() now prepare their own credentials and temporarily override the ones currently on the acting thread, whilst preventing interference from other threads by holding cred_replace_mutex on the thread being dumped. This will be replaced in a future patch by something that hands down the credentials directly to the functions being called, rather than altering the task's objective credentials. (3) LSM interface. A number of functions have been changed, added or removed: (*) security_capset_check(), ->capset_check() (*) security_capset_set(), ->capset_set() Removed in favour of security_capset(). (*) security_capset(), ->capset() New. This is passed a pointer to the new creds, a pointer to the old creds and the proposed capability sets. It should fill in the new creds or return an error. All pointers, barring the pointer to the new creds, are now const. (*) security_bprm_apply_creds(), ->bprm_apply_creds() Changed; now returns a value, which will cause the process to be killed if it's an error. (*) security_task_alloc(), ->task_alloc_security() Removed in favour of security_prepare_creds(). (*) security_cred_free(), ->cred_free() New. Free security data attached to cred->security. (*) security_prepare_creds(), ->cred_prepare() New. Duplicate any security data attached to cred->security. (*) security_commit_creds(), ->cred_commit() New. Apply any security effects for the upcoming installation of new security by commit_creds(). (*) security_task_post_setuid(), ->task_post_setuid() Removed in favour of security_task_fix_setuid(). (*) security_task_fix_setuid(), ->task_fix_setuid() Fix up the proposed new credentials for setuid(). This is used by cap_set_fix_setuid() to implicitly adjust capabilities in line with setuid() changes. Changes are made to the new credentials, rather than the task itself as in security_task_post_setuid(). (*) security_task_reparent_to_init(), ->task_reparent_to_init() Removed. Instead the task being reparented to init is referred directly to init's credentials. NOTE! This results in the loss of some state: SELinux's osid no longer records the sid of the thread that forked it. (*) security_key_alloc(), ->key_alloc() (*) security_key_permission(), ->key_permission() Changed. These now take cred pointers rather than task pointers to refer to the security context. (4) sys_capset(). This has been simplified and uses less locking. The LSM functions it calls have been merged. (5) reparent_to_kthreadd(). This gives the current thread the same credentials as init by simply using commit_thread() to point that way. (6) __sigqueue_alloc() and switch_uid() __sigqueue_alloc() can't stop the target task from changing its creds beneath it, so this function gets a reference to the currently applicable user_struct which it then passes into the sigqueue struct it returns if successful. switch_uid() is now called from commit_creds(), and possibly should be folded into that. commit_creds() should take care of protecting __sigqueue_alloc(). (7) [sg]et[ug]id() and co and [sg]et_current_groups. The set functions now all use prepare_creds(), commit_creds() and abort_creds() to build and check a new set of credentials before applying it. security_task_set[ug]id() is called inside the prepared section. This guarantees that nothing else will affect the creds until we've finished. The calling of set_dumpable() has been moved into commit_creds(). Much of the functionality of set_user() has been moved into commit_creds(). The get functions all simply access the data directly. (8) security_task_prctl() and cap_task_prctl(). security_task_prctl() has been modified to return -ENOSYS if it doesn't want to handle a function, or otherwise return the return value directly rather than through an argument. Additionally, cap_task_prctl() now prepares a new set of credentials, even if it doesn't end up using it. (9) Keyrings. A number of changes have been made to the keyrings code: (a) switch_uid_keyring(), copy_keys(), exit_keys() and suid_keys() have all been dropped and built in to the credentials functions directly. They may want separating out again later. (b) key_alloc() and search_process_keyrings() now take a cred pointer rather than a task pointer to specify the security context. (c) copy_creds() gives a new thread within the same thread group a new thread keyring if its parent had one, otherwise it discards the thread keyring. (d) The authorisation key now points directly to the credentials to extend the search into rather pointing to the task that carries them. (e) Installing thread, process or session keyrings causes a new set of credentials to be created, even though it's not strictly necessary for process or session keyrings (they're shared). (10) Usermode helper. The usermode helper code now carries a cred struct pointer in its subprocess_info struct instead of a new session keyring pointer. This set of credentials is derived from init_cred and installed on the new process after it has been cloned. call_usermodehelper_setup() allocates the new credentials and call_usermodehelper_freeinfo() discards them if they haven't been used. A special cred function (prepare_usermodeinfo_creds()) is provided specifically for call_usermodehelper_setup() to call. call_usermodehelper_setkeys() adjusts the credentials to sport the supplied keyring as the new session keyring. (11) SELinux. SELinux has a number of changes, in addition to those to support the LSM interface changes mentioned above: (a) selinux_setprocattr() no longer does its check for whether the current ptracer can access processes with the new SID inside the lock that covers getting the ptracer's SID. Whilst this lock ensures that the check is done with the ptracer pinned, the result is only valid until the lock is released, so there's no point doing it inside the lock. (12) is_single_threaded(). This function has been extracted from selinux_setprocattr() and put into a file of its own in the lib/ directory as join_session_keyring() now wants to use it too. The code in SELinux just checked to see whether a task shared mm_structs with other tasks (CLONE_VM), but that isn't good enough. We really want to know if they're part of the same thread group (CLONE_THREAD). (13) nfsd. The NFS server daemon now has to use the COW credentials to set the credentials it is going to use. It really needs to pass the credentials down to the functions it calls, but it can't do that until other patches in this series have been applied. Signed-off-by: David Howells <dhowells@redhat.com> Acked-by: James Morris <jmorris@namei.org> Signed-off-by: James Morris <jmorris@namei.org>
2008-11-14 07:39:23 +08:00
int (*cred_prepare)(struct cred *new, const struct cred *old,
gfp_t gfp);
KEYS: Add a keyctl to install a process's session keyring on its parent [try #6] Add a keyctl to install a process's session keyring onto its parent. This replaces the parent's session keyring. Because the COW credential code does not permit one process to change another process's credentials directly, the change is deferred until userspace next starts executing again. Normally this will be after a wait*() syscall. To support this, three new security hooks have been provided: cred_alloc_blank() to allocate unset security creds, cred_transfer() to fill in the blank security creds and key_session_to_parent() - which asks the LSM if the process may replace its parent's session keyring. The replacement may only happen if the process has the same ownership details as its parent, and the process has LINK permission on the session keyring, and the session keyring is owned by the process, and the LSM permits it. Note that this requires alteration to each architecture's notify_resume path. This has been done for all arches barring blackfin, m68k* and xtensa, all of which need assembly alteration to support TIF_NOTIFY_RESUME. This allows the replacement to be performed at the point the parent process resumes userspace execution. This allows the userspace AFS pioctl emulation to fully emulate newpag() and the VIOCSETTOK and VIOCSETTOK2 pioctls, all of which require the ability to alter the parent process's PAG membership. However, since kAFS doesn't use PAGs per se, but rather dumps the keys into the session keyring, the session keyring of the parent must be replaced if, for example, VIOCSETTOK is passed the newpag flag. This can be tested with the following program: #include <stdio.h> #include <stdlib.h> #include <keyutils.h> #define KEYCTL_SESSION_TO_PARENT 18 #define OSERROR(X, S) do { if ((long)(X) == -1) { perror(S); exit(1); } } while(0) int main(int argc, char **argv) { key_serial_t keyring, key; long ret; keyring = keyctl_join_session_keyring(argv[1]); OSERROR(keyring, "keyctl_join_session_keyring"); key = add_key("user", "a", "b", 1, keyring); OSERROR(key, "add_key"); ret = keyctl(KEYCTL_SESSION_TO_PARENT); OSERROR(ret, "KEYCTL_SESSION_TO_PARENT"); return 0; } Compiled and linked with -lkeyutils, you should see something like: [dhowells@andromeda ~]$ keyctl show Session Keyring -3 --alswrv 4043 4043 keyring: _ses 355907932 --alswrv 4043 -1 \_ keyring: _uid.4043 [dhowells@andromeda ~]$ /tmp/newpag [dhowells@andromeda ~]$ keyctl show Session Keyring -3 --alswrv 4043 4043 keyring: _ses 1055658746 --alswrv 4043 4043 \_ user: a [dhowells@andromeda ~]$ /tmp/newpag hello [dhowells@andromeda ~]$ keyctl show Session Keyring -3 --alswrv 4043 4043 keyring: hello 340417692 --alswrv 4043 4043 \_ user: a Where the test program creates a new session keyring, sticks a user key named 'a' into it and then installs it on its parent. Signed-off-by: David Howells <dhowells@redhat.com> Signed-off-by: James Morris <jmorris@namei.org>
2009-09-02 16:14:21 +08:00
void (*cred_transfer)(struct cred *new, const struct cred *old);
CRED: Allow kernel services to override LSM settings for task actions Allow kernel services to override LSM settings appropriate to the actions performed by a task by duplicating a set of credentials, modifying it and then using task_struct::cred to point to it when performing operations on behalf of a task. This is used, for example, by CacheFiles which has to transparently access the cache on behalf of a process that thinks it is doing, say, NFS accesses with a potentially inappropriate (with respect to accessing the cache) set of credentials. This patch provides two LSM hooks for modifying a task security record: (*) security_kernel_act_as() which allows modification of the security datum with which a task acts on other objects (most notably files). (*) security_kernel_create_files_as() which allows modification of the security datum that is used to initialise the security data on a file that a task creates. The patch also provides four new credentials handling functions, which wrap the LSM functions: (1) prepare_kernel_cred() Prepare a set of credentials for a kernel service to use, based either on a daemon's credentials or on init_cred. All the keyrings are cleared. (2) set_security_override() Set the LSM security ID in a set of credentials to a specific security context, assuming permission from the LSM policy. (3) set_security_override_from_ctx() As (2), but takes the security context as a string. (4) set_create_files_as() Set the file creation LSM security ID in a set of credentials to be the same as that on a particular inode. Signed-off-by: Casey Schaufler <casey@schaufler-ca.com> [Smack changes] Signed-off-by: David Howells <dhowells@redhat.com> Signed-off-by: James Morris <jmorris@namei.org>
2008-11-14 07:39:28 +08:00
int (*kernel_act_as)(struct cred *new, u32 secid);
int (*kernel_create_files_as)(struct cred *new, struct inode *inode);
int (*kernel_module_request)(char *kmod_name);
CRED: Inaugurate COW credentials Inaugurate copy-on-write credentials management. This uses RCU to manage the credentials pointer in the task_struct with respect to accesses by other tasks. A process may only modify its own credentials, and so does not need locking to access or modify its own credentials. A mutex (cred_replace_mutex) is added to the task_struct to control the effect of PTRACE_ATTACHED on credential calculations, particularly with respect to execve(). With this patch, the contents of an active credentials struct may not be changed directly; rather a new set of credentials must be prepared, modified and committed using something like the following sequence of events: struct cred *new = prepare_creds(); int ret = blah(new); if (ret < 0) { abort_creds(new); return ret; } return commit_creds(new); There are some exceptions to this rule: the keyrings pointed to by the active credentials may be instantiated - keyrings violate the COW rule as managing COW keyrings is tricky, given that it is possible for a task to directly alter the keys in a keyring in use by another task. To help enforce this, various pointers to sets of credentials, such as those in the task_struct, are declared const. The purpose of this is compile-time discouragement of altering credentials through those pointers. Once a set of credentials has been made public through one of these pointers, it may not be modified, except under special circumstances: (1) Its reference count may incremented and decremented. (2) The keyrings to which it points may be modified, but not replaced. The only safe way to modify anything else is to create a replacement and commit using the functions described in Documentation/credentials.txt (which will be added by a later patch). This patch and the preceding patches have been tested with the LTP SELinux testsuite. This patch makes several logical sets of alteration: (1) execve(). This now prepares and commits credentials in various places in the security code rather than altering the current creds directly. (2) Temporary credential overrides. do_coredump() and sys_faccessat() now prepare their own credentials and temporarily override the ones currently on the acting thread, whilst preventing interference from other threads by holding cred_replace_mutex on the thread being dumped. This will be replaced in a future patch by something that hands down the credentials directly to the functions being called, rather than altering the task's objective credentials. (3) LSM interface. A number of functions have been changed, added or removed: (*) security_capset_check(), ->capset_check() (*) security_capset_set(), ->capset_set() Removed in favour of security_capset(). (*) security_capset(), ->capset() New. This is passed a pointer to the new creds, a pointer to the old creds and the proposed capability sets. It should fill in the new creds or return an error. All pointers, barring the pointer to the new creds, are now const. (*) security_bprm_apply_creds(), ->bprm_apply_creds() Changed; now returns a value, which will cause the process to be killed if it's an error. (*) security_task_alloc(), ->task_alloc_security() Removed in favour of security_prepare_creds(). (*) security_cred_free(), ->cred_free() New. Free security data attached to cred->security. (*) security_prepare_creds(), ->cred_prepare() New. Duplicate any security data attached to cred->security. (*) security_commit_creds(), ->cred_commit() New. Apply any security effects for the upcoming installation of new security by commit_creds(). (*) security_task_post_setuid(), ->task_post_setuid() Removed in favour of security_task_fix_setuid(). (*) security_task_fix_setuid(), ->task_fix_setuid() Fix up the proposed new credentials for setuid(). This is used by cap_set_fix_setuid() to implicitly adjust capabilities in line with setuid() changes. Changes are made to the new credentials, rather than the task itself as in security_task_post_setuid(). (*) security_task_reparent_to_init(), ->task_reparent_to_init() Removed. Instead the task being reparented to init is referred directly to init's credentials. NOTE! This results in the loss of some state: SELinux's osid no longer records the sid of the thread that forked it. (*) security_key_alloc(), ->key_alloc() (*) security_key_permission(), ->key_permission() Changed. These now take cred pointers rather than task pointers to refer to the security context. (4) sys_capset(). This has been simplified and uses less locking. The LSM functions it calls have been merged. (5) reparent_to_kthreadd(). This gives the current thread the same credentials as init by simply using commit_thread() to point that way. (6) __sigqueue_alloc() and switch_uid() __sigqueue_alloc() can't stop the target task from changing its creds beneath it, so this function gets a reference to the currently applicable user_struct which it then passes into the sigqueue struct it returns if successful. switch_uid() is now called from commit_creds(), and possibly should be folded into that. commit_creds() should take care of protecting __sigqueue_alloc(). (7) [sg]et[ug]id() and co and [sg]et_current_groups. The set functions now all use prepare_creds(), commit_creds() and abort_creds() to build and check a new set of credentials before applying it. security_task_set[ug]id() is called inside the prepared section. This guarantees that nothing else will affect the creds until we've finished. The calling of set_dumpable() has been moved into commit_creds(). Much of the functionality of set_user() has been moved into commit_creds(). The get functions all simply access the data directly. (8) security_task_prctl() and cap_task_prctl(). security_task_prctl() has been modified to return -ENOSYS if it doesn't want to handle a function, or otherwise return the return value directly rather than through an argument. Additionally, cap_task_prctl() now prepares a new set of credentials, even if it doesn't end up using it. (9) Keyrings. A number of changes have been made to the keyrings code: (a) switch_uid_keyring(), copy_keys(), exit_keys() and suid_keys() have all been dropped and built in to the credentials functions directly. They may want separating out again later. (b) key_alloc() and search_process_keyrings() now take a cred pointer rather than a task pointer to specify the security context. (c) copy_creds() gives a new thread within the same thread group a new thread keyring if its parent had one, otherwise it discards the thread keyring. (d) The authorisation key now points directly to the credentials to extend the search into rather pointing to the task that carries them. (e) Installing thread, process or session keyrings causes a new set of credentials to be created, even though it's not strictly necessary for process or session keyrings (they're shared). (10) Usermode helper. The usermode helper code now carries a cred struct pointer in its subprocess_info struct instead of a new session keyring pointer. This set of credentials is derived from init_cred and installed on the new process after it has been cloned. call_usermodehelper_setup() allocates the new credentials and call_usermodehelper_freeinfo() discards them if they haven't been used. A special cred function (prepare_usermodeinfo_creds()) is provided specifically for call_usermodehelper_setup() to call. call_usermodehelper_setkeys() adjusts the credentials to sport the supplied keyring as the new session keyring. (11) SELinux. SELinux has a number of changes, in addition to those to support the LSM interface changes mentioned above: (a) selinux_setprocattr() no longer does its check for whether the current ptracer can access processes with the new SID inside the lock that covers getting the ptracer's SID. Whilst this lock ensures that the check is done with the ptracer pinned, the result is only valid until the lock is released, so there's no point doing it inside the lock. (12) is_single_threaded(). This function has been extracted from selinux_setprocattr() and put into a file of its own in the lib/ directory as join_session_keyring() now wants to use it too. The code in SELinux just checked to see whether a task shared mm_structs with other tasks (CLONE_VM), but that isn't good enough. We really want to know if they're part of the same thread group (CLONE_THREAD). (13) nfsd. The NFS server daemon now has to use the COW credentials to set the credentials it is going to use. It really needs to pass the credentials down to the functions it calls, but it can't do that until other patches in this series have been applied. Signed-off-by: David Howells <dhowells@redhat.com> Acked-by: James Morris <jmorris@namei.org> Signed-off-by: James Morris <jmorris@namei.org>
2008-11-14 07:39:23 +08:00
int (*task_fix_setuid) (struct cred *new, const struct cred *old,
int flags);
int (*task_setpgid) (struct task_struct *p, pid_t pgid);
int (*task_getpgid) (struct task_struct *p);
int (*task_getsid) (struct task_struct *p);
void (*task_getsecid) (struct task_struct *p, u32 *secid);
int (*task_setnice) (struct task_struct *p, int nice);
int (*task_setioprio) (struct task_struct *p, int ioprio);
int (*task_getioprio) (struct task_struct *p);
int (*task_setrlimit) (struct task_struct *p, unsigned int resource,
struct rlimit *new_rlim);
int (*task_setscheduler) (struct task_struct *p);
int (*task_getscheduler) (struct task_struct *p);
int (*task_movememory) (struct task_struct *p);
int (*task_kill) (struct task_struct *p,
struct siginfo *info, int sig, u32 secid);
int (*task_wait) (struct task_struct *p);
int (*task_prctl) (int option, unsigned long arg2,
unsigned long arg3, unsigned long arg4,
CRED: Inaugurate COW credentials Inaugurate copy-on-write credentials management. This uses RCU to manage the credentials pointer in the task_struct with respect to accesses by other tasks. A process may only modify its own credentials, and so does not need locking to access or modify its own credentials. A mutex (cred_replace_mutex) is added to the task_struct to control the effect of PTRACE_ATTACHED on credential calculations, particularly with respect to execve(). With this patch, the contents of an active credentials struct may not be changed directly; rather a new set of credentials must be prepared, modified and committed using something like the following sequence of events: struct cred *new = prepare_creds(); int ret = blah(new); if (ret < 0) { abort_creds(new); return ret; } return commit_creds(new); There are some exceptions to this rule: the keyrings pointed to by the active credentials may be instantiated - keyrings violate the COW rule as managing COW keyrings is tricky, given that it is possible for a task to directly alter the keys in a keyring in use by another task. To help enforce this, various pointers to sets of credentials, such as those in the task_struct, are declared const. The purpose of this is compile-time discouragement of altering credentials through those pointers. Once a set of credentials has been made public through one of these pointers, it may not be modified, except under special circumstances: (1) Its reference count may incremented and decremented. (2) The keyrings to which it points may be modified, but not replaced. The only safe way to modify anything else is to create a replacement and commit using the functions described in Documentation/credentials.txt (which will be added by a later patch). This patch and the preceding patches have been tested with the LTP SELinux testsuite. This patch makes several logical sets of alteration: (1) execve(). This now prepares and commits credentials in various places in the security code rather than altering the current creds directly. (2) Temporary credential overrides. do_coredump() and sys_faccessat() now prepare their own credentials and temporarily override the ones currently on the acting thread, whilst preventing interference from other threads by holding cred_replace_mutex on the thread being dumped. This will be replaced in a future patch by something that hands down the credentials directly to the functions being called, rather than altering the task's objective credentials. (3) LSM interface. A number of functions have been changed, added or removed: (*) security_capset_check(), ->capset_check() (*) security_capset_set(), ->capset_set() Removed in favour of security_capset(). (*) security_capset(), ->capset() New. This is passed a pointer to the new creds, a pointer to the old creds and the proposed capability sets. It should fill in the new creds or return an error. All pointers, barring the pointer to the new creds, are now const. (*) security_bprm_apply_creds(), ->bprm_apply_creds() Changed; now returns a value, which will cause the process to be killed if it's an error. (*) security_task_alloc(), ->task_alloc_security() Removed in favour of security_prepare_creds(). (*) security_cred_free(), ->cred_free() New. Free security data attached to cred->security. (*) security_prepare_creds(), ->cred_prepare() New. Duplicate any security data attached to cred->security. (*) security_commit_creds(), ->cred_commit() New. Apply any security effects for the upcoming installation of new security by commit_creds(). (*) security_task_post_setuid(), ->task_post_setuid() Removed in favour of security_task_fix_setuid(). (*) security_task_fix_setuid(), ->task_fix_setuid() Fix up the proposed new credentials for setuid(). This is used by cap_set_fix_setuid() to implicitly adjust capabilities in line with setuid() changes. Changes are made to the new credentials, rather than the task itself as in security_task_post_setuid(). (*) security_task_reparent_to_init(), ->task_reparent_to_init() Removed. Instead the task being reparented to init is referred directly to init's credentials. NOTE! This results in the loss of some state: SELinux's osid no longer records the sid of the thread that forked it. (*) security_key_alloc(), ->key_alloc() (*) security_key_permission(), ->key_permission() Changed. These now take cred pointers rather than task pointers to refer to the security context. (4) sys_capset(). This has been simplified and uses less locking. The LSM functions it calls have been merged. (5) reparent_to_kthreadd(). This gives the current thread the same credentials as init by simply using commit_thread() to point that way. (6) __sigqueue_alloc() and switch_uid() __sigqueue_alloc() can't stop the target task from changing its creds beneath it, so this function gets a reference to the currently applicable user_struct which it then passes into the sigqueue struct it returns if successful. switch_uid() is now called from commit_creds(), and possibly should be folded into that. commit_creds() should take care of protecting __sigqueue_alloc(). (7) [sg]et[ug]id() and co and [sg]et_current_groups. The set functions now all use prepare_creds(), commit_creds() and abort_creds() to build and check a new set of credentials before applying it. security_task_set[ug]id() is called inside the prepared section. This guarantees that nothing else will affect the creds until we've finished. The calling of set_dumpable() has been moved into commit_creds(). Much of the functionality of set_user() has been moved into commit_creds(). The get functions all simply access the data directly. (8) security_task_prctl() and cap_task_prctl(). security_task_prctl() has been modified to return -ENOSYS if it doesn't want to handle a function, or otherwise return the return value directly rather than through an argument. Additionally, cap_task_prctl() now prepares a new set of credentials, even if it doesn't end up using it. (9) Keyrings. A number of changes have been made to the keyrings code: (a) switch_uid_keyring(), copy_keys(), exit_keys() and suid_keys() have all been dropped and built in to the credentials functions directly. They may want separating out again later. (b) key_alloc() and search_process_keyrings() now take a cred pointer rather than a task pointer to specify the security context. (c) copy_creds() gives a new thread within the same thread group a new thread keyring if its parent had one, otherwise it discards the thread keyring. (d) The authorisation key now points directly to the credentials to extend the search into rather pointing to the task that carries them. (e) Installing thread, process or session keyrings causes a new set of credentials to be created, even though it's not strictly necessary for process or session keyrings (they're shared). (10) Usermode helper. The usermode helper code now carries a cred struct pointer in its subprocess_info struct instead of a new session keyring pointer. This set of credentials is derived from init_cred and installed on the new process after it has been cloned. call_usermodehelper_setup() allocates the new credentials and call_usermodehelper_freeinfo() discards them if they haven't been used. A special cred function (prepare_usermodeinfo_creds()) is provided specifically for call_usermodehelper_setup() to call. call_usermodehelper_setkeys() adjusts the credentials to sport the supplied keyring as the new session keyring. (11) SELinux. SELinux has a number of changes, in addition to those to support the LSM interface changes mentioned above: (a) selinux_setprocattr() no longer does its check for whether the current ptracer can access processes with the new SID inside the lock that covers getting the ptracer's SID. Whilst this lock ensures that the check is done with the ptracer pinned, the result is only valid until the lock is released, so there's no point doing it inside the lock. (12) is_single_threaded(). This function has been extracted from selinux_setprocattr() and put into a file of its own in the lib/ directory as join_session_keyring() now wants to use it too. The code in SELinux just checked to see whether a task shared mm_structs with other tasks (CLONE_VM), but that isn't good enough. We really want to know if they're part of the same thread group (CLONE_THREAD). (13) nfsd. The NFS server daemon now has to use the COW credentials to set the credentials it is going to use. It really needs to pass the credentials down to the functions it calls, but it can't do that until other patches in this series have been applied. Signed-off-by: David Howells <dhowells@redhat.com> Acked-by: James Morris <jmorris@namei.org> Signed-off-by: James Morris <jmorris@namei.org>
2008-11-14 07:39:23 +08:00
unsigned long arg5);
void (*task_to_inode) (struct task_struct *p, struct inode *inode);
int (*ipc_permission) (struct kern_ipc_perm *ipcp, short flag);
void (*ipc_getsecid) (struct kern_ipc_perm *ipcp, u32 *secid);
int (*msg_msg_alloc_security) (struct msg_msg *msg);
void (*msg_msg_free_security) (struct msg_msg *msg);
int (*msg_queue_alloc_security) (struct msg_queue *msq);
void (*msg_queue_free_security) (struct msg_queue *msq);
int (*msg_queue_associate) (struct msg_queue *msq, int msqflg);
int (*msg_queue_msgctl) (struct msg_queue *msq, int cmd);
int (*msg_queue_msgsnd) (struct msg_queue *msq,
struct msg_msg *msg, int msqflg);
int (*msg_queue_msgrcv) (struct msg_queue *msq,
struct msg_msg *msg,
struct task_struct *target,
long type, int mode);
int (*shm_alloc_security) (struct shmid_kernel *shp);
void (*shm_free_security) (struct shmid_kernel *shp);
int (*shm_associate) (struct shmid_kernel *shp, int shmflg);
int (*shm_shmctl) (struct shmid_kernel *shp, int cmd);
int (*shm_shmat) (struct shmid_kernel *shp,
char __user *shmaddr, int shmflg);
int (*sem_alloc_security) (struct sem_array *sma);
void (*sem_free_security) (struct sem_array *sma);
int (*sem_associate) (struct sem_array *sma, int semflg);
int (*sem_semctl) (struct sem_array *sma, int cmd);
int (*sem_semop) (struct sem_array *sma,
struct sembuf *sops, unsigned nsops, int alter);
int (*netlink_send) (struct sock *sk, struct sk_buff *skb);
int (*netlink_recv) (struct sk_buff *skb, int cap);
void (*d_instantiate) (struct dentry *dentry, struct inode *inode);
int (*getprocattr) (struct task_struct *p, char *name, char **value);
int (*setprocattr) (struct task_struct *p, char *name, void *value, size_t size);
int (*secid_to_secctx) (u32 secid, char **secdata, u32 *seclen);
int (*secctx_to_secid) (const char *secdata, u32 seclen, u32 *secid);
void (*release_secctx) (char *secdata, u32 seclen);
LSM/SELinux: inode_{get,set,notify}secctx hooks to access LSM security context information. This patch introduces three new hooks. The inode_getsecctx hook is used to get all relevant information from an LSM about an inode. The inode_setsecctx is used to set both the in-core and on-disk state for the inode based on a context derived from inode_getsecctx.The final hook inode_notifysecctx will notify the LSM of a change for the in-core state of the inode in question. These hooks are for use in the labeled NFS code and addresses concerns of how to set security on an inode in a multi-xattr LSM. For historical reasons Stephen Smalley's explanation of the reason for these hooks is pasted below. Quote Stephen Smalley inode_setsecctx: Change the security context of an inode. Updates the in core security context managed by the security module and invokes the fs code as needed (via __vfs_setxattr_noperm) to update any backing xattrs that represent the context. Example usage: NFS server invokes this hook to change the security context in its incore inode and on the backing file system to a value provided by the client on a SETATTR operation. inode_notifysecctx: Notify the security module of what the security context of an inode should be. Initializes the incore security context managed by the security module for this inode. Example usage: NFS client invokes this hook to initialize the security context in its incore inode to the value provided by the server for the file when the server returned the file's attributes to the client. Signed-off-by: David P. Quigley <dpquigl@tycho.nsa.gov> Acked-by: Serge Hallyn <serue@us.ibm.com> Signed-off-by: James Morris <jmorris@namei.org>
2009-09-04 02:25:57 +08:00
int (*inode_notifysecctx)(struct inode *inode, void *ctx, u32 ctxlen);
int (*inode_setsecctx)(struct dentry *dentry, void *ctx, u32 ctxlen);
int (*inode_getsecctx)(struct inode *inode, void **ctx, u32 *ctxlen);
#ifdef CONFIG_SECURITY_NETWORK
int (*unix_stream_connect) (struct sock *sock, struct sock *other, struct sock *newsk);
int (*unix_may_send) (struct socket *sock, struct socket *other);
int (*socket_create) (int family, int type, int protocol, int kern);
int (*socket_post_create) (struct socket *sock, int family,
int type, int protocol, int kern);
int (*socket_bind) (struct socket *sock,
struct sockaddr *address, int addrlen);
int (*socket_connect) (struct socket *sock,
struct sockaddr *address, int addrlen);
int (*socket_listen) (struct socket *sock, int backlog);
int (*socket_accept) (struct socket *sock, struct socket *newsock);
int (*socket_sendmsg) (struct socket *sock,
struct msghdr *msg, int size);
int (*socket_recvmsg) (struct socket *sock,
struct msghdr *msg, int size, int flags);
int (*socket_getsockname) (struct socket *sock);
int (*socket_getpeername) (struct socket *sock);
int (*socket_getsockopt) (struct socket *sock, int level, int optname);
int (*socket_setsockopt) (struct socket *sock, int level, int optname);
int (*socket_shutdown) (struct socket *sock, int how);
int (*socket_sock_rcv_skb) (struct sock *sk, struct sk_buff *skb);
[SECURITY]: TCP/UDP getpeersec This patch implements an application of the LSM-IPSec networking controls whereby an application can determine the label of the security association its TCP or UDP sockets are currently connected to via getsockopt and the auxiliary data mechanism of recvmsg. Patch purpose: This patch enables a security-aware application to retrieve the security context of an IPSec security association a particular TCP or UDP socket is using. The application can then use this security context to determine the security context for processing on behalf of the peer at the other end of this connection. In the case of UDP, the security context is for each individual packet. An example application is the inetd daemon, which could be modified to start daemons running at security contexts dependent on the remote client. Patch design approach: - Design for TCP The patch enables the SELinux LSM to set the peer security context for a socket based on the security context of the IPSec security association. The application may retrieve this context using getsockopt. When called, the kernel determines if the socket is a connected (TCP_ESTABLISHED) TCP socket and, if so, uses the dst_entry cache on the socket to retrieve the security associations. If a security association has a security context, the context string is returned, as for UNIX domain sockets. - Design for UDP Unlike TCP, UDP is connectionless. This requires a somewhat different API to retrieve the peer security context. With TCP, the peer security context stays the same throughout the connection, thus it can be retrieved at any time between when the connection is established and when it is torn down. With UDP, each read/write can have different peer and thus the security context might change every time. As a result the security context retrieval must be done TOGETHER with the packet retrieval. The solution is to build upon the existing Unix domain socket API for retrieving user credentials. Linux offers the API for obtaining user credentials via ancillary messages (i.e., out of band/control messages that are bundled together with a normal message). Patch implementation details: - Implementation for TCP The security context can be retrieved by applications using getsockopt with the existing SO_PEERSEC flag. As an example (ignoring error checking): getsockopt(sockfd, SOL_SOCKET, SO_PEERSEC, optbuf, &optlen); printf("Socket peer context is: %s\n", optbuf); The SELinux function, selinux_socket_getpeersec, is extended to check for labeled security associations for connected (TCP_ESTABLISHED == sk->sk_state) TCP sockets only. If so, the socket has a dst_cache of struct dst_entry values that may refer to security associations. If these have security associations with security contexts, the security context is returned. getsockopt returns a buffer that contains a security context string or the buffer is unmodified. - Implementation for UDP To retrieve the security context, the application first indicates to the kernel such desire by setting the IP_PASSSEC option via getsockopt. Then the application retrieves the security context using the auxiliary data mechanism. An example server application for UDP should look like this: toggle = 1; toggle_len = sizeof(toggle); setsockopt(sockfd, SOL_IP, IP_PASSSEC, &toggle, &toggle_len); recvmsg(sockfd, &msg_hdr, 0); if (msg_hdr.msg_controllen > sizeof(struct cmsghdr)) { cmsg_hdr = CMSG_FIRSTHDR(&msg_hdr); if (cmsg_hdr->cmsg_len <= CMSG_LEN(sizeof(scontext)) && cmsg_hdr->cmsg_level == SOL_IP && cmsg_hdr->cmsg_type == SCM_SECURITY) { memcpy(&scontext, CMSG_DATA(cmsg_hdr), sizeof(scontext)); } } ip_setsockopt is enhanced with a new socket option IP_PASSSEC to allow a server socket to receive security context of the peer. A new ancillary message type SCM_SECURITY. When the packet is received we get the security context from the sec_path pointer which is contained in the sk_buff, and copy it to the ancillary message space. An additional LSM hook, selinux_socket_getpeersec_udp, is defined to retrieve the security context from the SELinux space. The existing function, selinux_socket_getpeersec does not suit our purpose, because the security context is copied directly to user space, rather than to kernel space. Testing: We have tested the patch by setting up TCP and UDP connections between applications on two machines using the IPSec policies that result in labeled security associations being built. For TCP, we can then extract the peer security context using getsockopt on either end. For UDP, the receiving end can retrieve the security context using the auxiliary data mechanism of recvmsg. Signed-off-by: Catherine Zhang <cxzhang@watson.ibm.com> Acked-by: James Morris <jmorris@namei.org> Acked-by: Herbert Xu <herbert@gondor.apana.org.au> Signed-off-by: David S. Miller <davem@davemloft.net>
2006-03-21 14:41:23 +08:00
int (*socket_getpeersec_stream) (struct socket *sock, char __user *optval, int __user *optlen, unsigned len);
int (*socket_getpeersec_dgram) (struct socket *sock, struct sk_buff *skb, u32 *secid);
int (*sk_alloc_security) (struct sock *sk, int family, gfp_t priority);
void (*sk_free_security) (struct sock *sk);
void (*sk_clone_security) (const struct sock *sk, struct sock *newsk);
void (*sk_getsecid) (struct sock *sk, u32 *secid);
void (*sock_graft) (struct sock *sk, struct socket *parent);
int (*inet_conn_request) (struct sock *sk, struct sk_buff *skb,
struct request_sock *req);
void (*inet_csk_clone) (struct sock *newsk, const struct request_sock *req);
void (*inet_conn_established) (struct sock *sk, struct sk_buff *skb);
int (*secmark_relabel_packet) (u32 secid);
void (*secmark_refcount_inc) (void);
void (*secmark_refcount_dec) (void);
void (*req_classify_flow) (const struct request_sock *req, struct flowi *fl);
int (*tun_dev_create)(void);
void (*tun_dev_post_create)(struct sock *sk);
int (*tun_dev_attach)(struct sock *sk);
#endif /* CONFIG_SECURITY_NETWORK */
[LSM-IPSec]: Security association restriction. This patch series implements per packet access control via the extension of the Linux Security Modules (LSM) interface by hooks in the XFRM and pfkey subsystems that leverage IPSec security associations to label packets. Extensions to the SELinux LSM are included that leverage the patch for this purpose. This patch implements the changes necessary to the XFRM subsystem, pfkey interface, ipv4/ipv6, and xfrm_user interface to restrict a socket to use only authorized security associations (or no security association) to send/receive network packets. Patch purpose: The patch is designed to enable access control per packets based on the strongly authenticated IPSec security association. Such access controls augment the existing ones based on network interface and IP address. The former are very coarse-grained, and the latter can be spoofed. By using IPSec, the system can control access to remote hosts based on cryptographic keys generated using the IPSec mechanism. This enables access control on a per-machine basis or per-application if the remote machine is running the same mechanism and trusted to enforce the access control policy. Patch design approach: The overall approach is that policy (xfrm_policy) entries set by user-level programs (e.g., setkey for ipsec-tools) are extended with a security context that is used at policy selection time in the XFRM subsystem to restrict the sockets that can send/receive packets via security associations (xfrm_states) that are built from those policies. A presentation available at www.selinux-symposium.org/2005/presentations/session2/2-3-jaeger.pdf from the SELinux symposium describes the overall approach. Patch implementation details: On output, the policy retrieved (via xfrm_policy_lookup or xfrm_sk_policy_lookup) must be authorized for the security context of the socket and the same security context is required for resultant security association (retrieved or negotiated via racoon in ipsec-tools). This is enforced in xfrm_state_find. On input, the policy retrieved must also be authorized for the socket (at __xfrm_policy_check), and the security context of the policy must also match the security association being used. The patch has virtually no impact on packets that do not use IPSec. The existing Netfilter (outgoing) and LSM rcv_skb hooks are used as before. Also, if IPSec is used without security contexts, the impact is minimal. The LSM must allow such policies to be selected for the combination of socket and remote machine, but subsequent IPSec processing proceeds as in the original case. Testing: The pfkey interface is tested using the ipsec-tools. ipsec-tools have been modified (a separate ipsec-tools patch is available for version 0.5) that supports assignment of xfrm_policy entries and security associations with security contexts via setkey and the negotiation using the security contexts via racoon. The xfrm_user interface is tested via ad hoc programs that set security contexts. These programs are also available from me, and contain programs for setting, getting, and deleting policy for testing this interface. Testing of sa functions was done by tracing kernel behavior. Signed-off-by: Trent Jaeger <tjaeger@cse.psu.edu> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> Signed-off-by: David S. Miller <davem@davemloft.net>
2005-12-14 15:12:27 +08:00
#ifdef CONFIG_SECURITY_NETWORK_XFRM
int (*xfrm_policy_alloc_security) (struct xfrm_sec_ctx **ctxp,
SELinux: Various xfrm labeling fixes Since the upstreaming of the mlsxfrm modification a few months back, testing has resulted in the identification of the following issues/bugs that are resolved in this patch set. 1. Fix the security context used in the IKE negotiation to be the context of the socket as opposed to the context of the SPD rule. 2. Fix SO_PEERSEC for tcp sockets to return the security context of the peer as opposed to the source. 3. Fix the selection of an SA for an outgoing packet to be at the same context as the originating socket/flow. The following would be the result of applying this patchset: - SO_PEERSEC will now correctly return the peer's context. - IKE deamons will receive the context of the source socket/flow as opposed to the SPD rule's context so that the negotiated SA will be at the same context as the source socket/flow. - The SELinux policy will require one or more of the following for a socket to be able to communicate with/without SAs: 1. To enable a socket to communicate without using labeled-IPSec SAs: allow socket_t unlabeled_t:association { sendto recvfrom } 2. To enable a socket to communicate with labeled-IPSec SAs: allow socket_t self:association { sendto }; allow socket_t peer_sa_t:association { recvfrom }; This Patch: Pass correct security context to IKE for use in negotiation Fix the security context passed to IKE for use in negotiation to be the context of the socket as opposed to the context of the SPD rule so that the SA carries the label of the originating socket/flow. Signed-off-by: Venkat Yekkirala <vyekkirala@TrustedCS.com> Signed-off-by: James Morris <jmorris@namei.org>
2006-11-09 07:03:44 +08:00
struct xfrm_user_sec_ctx *sec_ctx);
int (*xfrm_policy_clone_security) (struct xfrm_sec_ctx *old_ctx, struct xfrm_sec_ctx **new_ctx);
void (*xfrm_policy_free_security) (struct xfrm_sec_ctx *ctx);
int (*xfrm_policy_delete_security) (struct xfrm_sec_ctx *ctx);
int (*xfrm_state_alloc_security) (struct xfrm_state *x,
SELinux: Various xfrm labeling fixes Since the upstreaming of the mlsxfrm modification a few months back, testing has resulted in the identification of the following issues/bugs that are resolved in this patch set. 1. Fix the security context used in the IKE negotiation to be the context of the socket as opposed to the context of the SPD rule. 2. Fix SO_PEERSEC for tcp sockets to return the security context of the peer as opposed to the source. 3. Fix the selection of an SA for an outgoing packet to be at the same context as the originating socket/flow. The following would be the result of applying this patchset: - SO_PEERSEC will now correctly return the peer's context. - IKE deamons will receive the context of the source socket/flow as opposed to the SPD rule's context so that the negotiated SA will be at the same context as the source socket/flow. - The SELinux policy will require one or more of the following for a socket to be able to communicate with/without SAs: 1. To enable a socket to communicate without using labeled-IPSec SAs: allow socket_t unlabeled_t:association { sendto recvfrom } 2. To enable a socket to communicate with labeled-IPSec SAs: allow socket_t self:association { sendto }; allow socket_t peer_sa_t:association { recvfrom }; This Patch: Pass correct security context to IKE for use in negotiation Fix the security context passed to IKE for use in negotiation to be the context of the socket as opposed to the context of the SPD rule so that the SA carries the label of the originating socket/flow. Signed-off-by: Venkat Yekkirala <vyekkirala@TrustedCS.com> Signed-off-by: James Morris <jmorris@namei.org>
2006-11-09 07:03:44 +08:00
struct xfrm_user_sec_ctx *sec_ctx,
u32 secid);
[LSM-IPSec]: Security association restriction. This patch series implements per packet access control via the extension of the Linux Security Modules (LSM) interface by hooks in the XFRM and pfkey subsystems that leverage IPSec security associations to label packets. Extensions to the SELinux LSM are included that leverage the patch for this purpose. This patch implements the changes necessary to the XFRM subsystem, pfkey interface, ipv4/ipv6, and xfrm_user interface to restrict a socket to use only authorized security associations (or no security association) to send/receive network packets. Patch purpose: The patch is designed to enable access control per packets based on the strongly authenticated IPSec security association. Such access controls augment the existing ones based on network interface and IP address. The former are very coarse-grained, and the latter can be spoofed. By using IPSec, the system can control access to remote hosts based on cryptographic keys generated using the IPSec mechanism. This enables access control on a per-machine basis or per-application if the remote machine is running the same mechanism and trusted to enforce the access control policy. Patch design approach: The overall approach is that policy (xfrm_policy) entries set by user-level programs (e.g., setkey for ipsec-tools) are extended with a security context that is used at policy selection time in the XFRM subsystem to restrict the sockets that can send/receive packets via security associations (xfrm_states) that are built from those policies. A presentation available at www.selinux-symposium.org/2005/presentations/session2/2-3-jaeger.pdf from the SELinux symposium describes the overall approach. Patch implementation details: On output, the policy retrieved (via xfrm_policy_lookup or xfrm_sk_policy_lookup) must be authorized for the security context of the socket and the same security context is required for resultant security association (retrieved or negotiated via racoon in ipsec-tools). This is enforced in xfrm_state_find. On input, the policy retrieved must also be authorized for the socket (at __xfrm_policy_check), and the security context of the policy must also match the security association being used. The patch has virtually no impact on packets that do not use IPSec. The existing Netfilter (outgoing) and LSM rcv_skb hooks are used as before. Also, if IPSec is used without security contexts, the impact is minimal. The LSM must allow such policies to be selected for the combination of socket and remote machine, but subsequent IPSec processing proceeds as in the original case. Testing: The pfkey interface is tested using the ipsec-tools. ipsec-tools have been modified (a separate ipsec-tools patch is available for version 0.5) that supports assignment of xfrm_policy entries and security associations with security contexts via setkey and the negotiation using the security contexts via racoon. The xfrm_user interface is tested via ad hoc programs that set security contexts. These programs are also available from me, and contain programs for setting, getting, and deleting policy for testing this interface. Testing of sa functions was done by tracing kernel behavior. Signed-off-by: Trent Jaeger <tjaeger@cse.psu.edu> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> Signed-off-by: David S. Miller <davem@davemloft.net>
2005-12-14 15:12:27 +08:00
void (*xfrm_state_free_security) (struct xfrm_state *x);
[LSM-IPsec]: SELinux Authorize This patch contains a fix for the previous patch that adds security contexts to IPsec policies and security associations. In the previous patch, no authorization (besides the check for write permissions to SAD and SPD) is required to delete IPsec policies and security assocations with security contexts. Thus a user authorized to change SAD and SPD can bypass the IPsec policy authorization by simply deleteing policies with security contexts. To fix this security hole, an additional authorization check is added for removing security policies and security associations with security contexts. Note that if no security context is supplied on add or present on policy to be deleted, the SELinux module allows the change unconditionally. The hook is called on deletion when no context is present, which we may want to change. At present, I left it up to the module. LSM changes: The patch adds two new LSM hooks: xfrm_policy_delete and xfrm_state_delete. The new hooks are necessary to authorize deletion of IPsec policies that have security contexts. The existing hooks xfrm_policy_free and xfrm_state_free lack the context to do the authorization, so I decided to split authorization of deletion and memory management of security data, as is typical in the LSM interface. Use: The new delete hooks are checked when xfrm_policy or xfrm_state are deleted by either the xfrm_user interface (xfrm_get_policy, xfrm_del_sa) or the pfkey interface (pfkey_spddelete, pfkey_delete). SELinux changes: The new policy_delete and state_delete functions are added. Signed-off-by: Catherine Zhang <cxzhang@watson.ibm.com> Signed-off-by: Trent Jaeger <tjaeger@cse.psu.edu> Acked-by: James Morris <jmorris@namei.org> Signed-off-by: David S. Miller <davem@davemloft.net>
2006-06-09 14:39:49 +08:00
int (*xfrm_state_delete_security) (struct xfrm_state *x);
int (*xfrm_policy_lookup) (struct xfrm_sec_ctx *ctx, u32 fl_secid, u8 dir);
int (*xfrm_state_pol_flow_match) (struct xfrm_state *x,
struct xfrm_policy *xp,
struct flowi *fl);
int (*xfrm_decode_session) (struct sk_buff *skb, u32 *secid, int ckall);
[LSM-IPSec]: Security association restriction. This patch series implements per packet access control via the extension of the Linux Security Modules (LSM) interface by hooks in the XFRM and pfkey subsystems that leverage IPSec security associations to label packets. Extensions to the SELinux LSM are included that leverage the patch for this purpose. This patch implements the changes necessary to the XFRM subsystem, pfkey interface, ipv4/ipv6, and xfrm_user interface to restrict a socket to use only authorized security associations (or no security association) to send/receive network packets. Patch purpose: The patch is designed to enable access control per packets based on the strongly authenticated IPSec security association. Such access controls augment the existing ones based on network interface and IP address. The former are very coarse-grained, and the latter can be spoofed. By using IPSec, the system can control access to remote hosts based on cryptographic keys generated using the IPSec mechanism. This enables access control on a per-machine basis or per-application if the remote machine is running the same mechanism and trusted to enforce the access control policy. Patch design approach: The overall approach is that policy (xfrm_policy) entries set by user-level programs (e.g., setkey for ipsec-tools) are extended with a security context that is used at policy selection time in the XFRM subsystem to restrict the sockets that can send/receive packets via security associations (xfrm_states) that are built from those policies. A presentation available at www.selinux-symposium.org/2005/presentations/session2/2-3-jaeger.pdf from the SELinux symposium describes the overall approach. Patch implementation details: On output, the policy retrieved (via xfrm_policy_lookup or xfrm_sk_policy_lookup) must be authorized for the security context of the socket and the same security context is required for resultant security association (retrieved or negotiated via racoon in ipsec-tools). This is enforced in xfrm_state_find. On input, the policy retrieved must also be authorized for the socket (at __xfrm_policy_check), and the security context of the policy must also match the security association being used. The patch has virtually no impact on packets that do not use IPSec. The existing Netfilter (outgoing) and LSM rcv_skb hooks are used as before. Also, if IPSec is used without security contexts, the impact is minimal. The LSM must allow such policies to be selected for the combination of socket and remote machine, but subsequent IPSec processing proceeds as in the original case. Testing: The pfkey interface is tested using the ipsec-tools. ipsec-tools have been modified (a separate ipsec-tools patch is available for version 0.5) that supports assignment of xfrm_policy entries and security associations with security contexts via setkey and the negotiation using the security contexts via racoon. The xfrm_user interface is tested via ad hoc programs that set security contexts. These programs are also available from me, and contain programs for setting, getting, and deleting policy for testing this interface. Testing of sa functions was done by tracing kernel behavior. Signed-off-by: Trent Jaeger <tjaeger@cse.psu.edu> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> Signed-off-by: David S. Miller <davem@davemloft.net>
2005-12-14 15:12:27 +08:00
#endif /* CONFIG_SECURITY_NETWORK_XFRM */
/* key management security hooks */
#ifdef CONFIG_KEYS
CRED: Inaugurate COW credentials Inaugurate copy-on-write credentials management. This uses RCU to manage the credentials pointer in the task_struct with respect to accesses by other tasks. A process may only modify its own credentials, and so does not need locking to access or modify its own credentials. A mutex (cred_replace_mutex) is added to the task_struct to control the effect of PTRACE_ATTACHED on credential calculations, particularly with respect to execve(). With this patch, the contents of an active credentials struct may not be changed directly; rather a new set of credentials must be prepared, modified and committed using something like the following sequence of events: struct cred *new = prepare_creds(); int ret = blah(new); if (ret < 0) { abort_creds(new); return ret; } return commit_creds(new); There are some exceptions to this rule: the keyrings pointed to by the active credentials may be instantiated - keyrings violate the COW rule as managing COW keyrings is tricky, given that it is possible for a task to directly alter the keys in a keyring in use by another task. To help enforce this, various pointers to sets of credentials, such as those in the task_struct, are declared const. The purpose of this is compile-time discouragement of altering credentials through those pointers. Once a set of credentials has been made public through one of these pointers, it may not be modified, except under special circumstances: (1) Its reference count may incremented and decremented. (2) The keyrings to which it points may be modified, but not replaced. The only safe way to modify anything else is to create a replacement and commit using the functions described in Documentation/credentials.txt (which will be added by a later patch). This patch and the preceding patches have been tested with the LTP SELinux testsuite. This patch makes several logical sets of alteration: (1) execve(). This now prepares and commits credentials in various places in the security code rather than altering the current creds directly. (2) Temporary credential overrides. do_coredump() and sys_faccessat() now prepare their own credentials and temporarily override the ones currently on the acting thread, whilst preventing interference from other threads by holding cred_replace_mutex on the thread being dumped. This will be replaced in a future patch by something that hands down the credentials directly to the functions being called, rather than altering the task's objective credentials. (3) LSM interface. A number of functions have been changed, added or removed: (*) security_capset_check(), ->capset_check() (*) security_capset_set(), ->capset_set() Removed in favour of security_capset(). (*) security_capset(), ->capset() New. This is passed a pointer to the new creds, a pointer to the old creds and the proposed capability sets. It should fill in the new creds or return an error. All pointers, barring the pointer to the new creds, are now const. (*) security_bprm_apply_creds(), ->bprm_apply_creds() Changed; now returns a value, which will cause the process to be killed if it's an error. (*) security_task_alloc(), ->task_alloc_security() Removed in favour of security_prepare_creds(). (*) security_cred_free(), ->cred_free() New. Free security data attached to cred->security. (*) security_prepare_creds(), ->cred_prepare() New. Duplicate any security data attached to cred->security. (*) security_commit_creds(), ->cred_commit() New. Apply any security effects for the upcoming installation of new security by commit_creds(). (*) security_task_post_setuid(), ->task_post_setuid() Removed in favour of security_task_fix_setuid(). (*) security_task_fix_setuid(), ->task_fix_setuid() Fix up the proposed new credentials for setuid(). This is used by cap_set_fix_setuid() to implicitly adjust capabilities in line with setuid() changes. Changes are made to the new credentials, rather than the task itself as in security_task_post_setuid(). (*) security_task_reparent_to_init(), ->task_reparent_to_init() Removed. Instead the task being reparented to init is referred directly to init's credentials. NOTE! This results in the loss of some state: SELinux's osid no longer records the sid of the thread that forked it. (*) security_key_alloc(), ->key_alloc() (*) security_key_permission(), ->key_permission() Changed. These now take cred pointers rather than task pointers to refer to the security context. (4) sys_capset(). This has been simplified and uses less locking. The LSM functions it calls have been merged. (5) reparent_to_kthreadd(). This gives the current thread the same credentials as init by simply using commit_thread() to point that way. (6) __sigqueue_alloc() and switch_uid() __sigqueue_alloc() can't stop the target task from changing its creds beneath it, so this function gets a reference to the currently applicable user_struct which it then passes into the sigqueue struct it returns if successful. switch_uid() is now called from commit_creds(), and possibly should be folded into that. commit_creds() should take care of protecting __sigqueue_alloc(). (7) [sg]et[ug]id() and co and [sg]et_current_groups. The set functions now all use prepare_creds(), commit_creds() and abort_creds() to build and check a new set of credentials before applying it. security_task_set[ug]id() is called inside the prepared section. This guarantees that nothing else will affect the creds until we've finished. The calling of set_dumpable() has been moved into commit_creds(). Much of the functionality of set_user() has been moved into commit_creds(). The get functions all simply access the data directly. (8) security_task_prctl() and cap_task_prctl(). security_task_prctl() has been modified to return -ENOSYS if it doesn't want to handle a function, or otherwise return the return value directly rather than through an argument. Additionally, cap_task_prctl() now prepares a new set of credentials, even if it doesn't end up using it. (9) Keyrings. A number of changes have been made to the keyrings code: (a) switch_uid_keyring(), copy_keys(), exit_keys() and suid_keys() have all been dropped and built in to the credentials functions directly. They may want separating out again later. (b) key_alloc() and search_process_keyrings() now take a cred pointer rather than a task pointer to specify the security context. (c) copy_creds() gives a new thread within the same thread group a new thread keyring if its parent had one, otherwise it discards the thread keyring. (d) The authorisation key now points directly to the credentials to extend the search into rather pointing to the task that carries them. (e) Installing thread, process or session keyrings causes a new set of credentials to be created, even though it's not strictly necessary for process or session keyrings (they're shared). (10) Usermode helper. The usermode helper code now carries a cred struct pointer in its subprocess_info struct instead of a new session keyring pointer. This set of credentials is derived from init_cred and installed on the new process after it has been cloned. call_usermodehelper_setup() allocates the new credentials and call_usermodehelper_freeinfo() discards them if they haven't been used. A special cred function (prepare_usermodeinfo_creds()) is provided specifically for call_usermodehelper_setup() to call. call_usermodehelper_setkeys() adjusts the credentials to sport the supplied keyring as the new session keyring. (11) SELinux. SELinux has a number of changes, in addition to those to support the LSM interface changes mentioned above: (a) selinux_setprocattr() no longer does its check for whether the current ptracer can access processes with the new SID inside the lock that covers getting the ptracer's SID. Whilst this lock ensures that the check is done with the ptracer pinned, the result is only valid until the lock is released, so there's no point doing it inside the lock. (12) is_single_threaded(). This function has been extracted from selinux_setprocattr() and put into a file of its own in the lib/ directory as join_session_keyring() now wants to use it too. The code in SELinux just checked to see whether a task shared mm_structs with other tasks (CLONE_VM), but that isn't good enough. We really want to know if they're part of the same thread group (CLONE_THREAD). (13) nfsd. The NFS server daemon now has to use the COW credentials to set the credentials it is going to use. It really needs to pass the credentials down to the functions it calls, but it can't do that until other patches in this series have been applied. Signed-off-by: David Howells <dhowells@redhat.com> Acked-by: James Morris <jmorris@namei.org> Signed-off-by: James Morris <jmorris@namei.org>
2008-11-14 07:39:23 +08:00
int (*key_alloc) (struct key *key, const struct cred *cred, unsigned long flags);
void (*key_free) (struct key *key);
int (*key_permission) (key_ref_t key_ref,
CRED: Inaugurate COW credentials Inaugurate copy-on-write credentials management. This uses RCU to manage the credentials pointer in the task_struct with respect to accesses by other tasks. A process may only modify its own credentials, and so does not need locking to access or modify its own credentials. A mutex (cred_replace_mutex) is added to the task_struct to control the effect of PTRACE_ATTACHED on credential calculations, particularly with respect to execve(). With this patch, the contents of an active credentials struct may not be changed directly; rather a new set of credentials must be prepared, modified and committed using something like the following sequence of events: struct cred *new = prepare_creds(); int ret = blah(new); if (ret < 0) { abort_creds(new); return ret; } return commit_creds(new); There are some exceptions to this rule: the keyrings pointed to by the active credentials may be instantiated - keyrings violate the COW rule as managing COW keyrings is tricky, given that it is possible for a task to directly alter the keys in a keyring in use by another task. To help enforce this, various pointers to sets of credentials, such as those in the task_struct, are declared const. The purpose of this is compile-time discouragement of altering credentials through those pointers. Once a set of credentials has been made public through one of these pointers, it may not be modified, except under special circumstances: (1) Its reference count may incremented and decremented. (2) The keyrings to which it points may be modified, but not replaced. The only safe way to modify anything else is to create a replacement and commit using the functions described in Documentation/credentials.txt (which will be added by a later patch). This patch and the preceding patches have been tested with the LTP SELinux testsuite. This patch makes several logical sets of alteration: (1) execve(). This now prepares and commits credentials in various places in the security code rather than altering the current creds directly. (2) Temporary credential overrides. do_coredump() and sys_faccessat() now prepare their own credentials and temporarily override the ones currently on the acting thread, whilst preventing interference from other threads by holding cred_replace_mutex on the thread being dumped. This will be replaced in a future patch by something that hands down the credentials directly to the functions being called, rather than altering the task's objective credentials. (3) LSM interface. A number of functions have been changed, added or removed: (*) security_capset_check(), ->capset_check() (*) security_capset_set(), ->capset_set() Removed in favour of security_capset(). (*) security_capset(), ->capset() New. This is passed a pointer to the new creds, a pointer to the old creds and the proposed capability sets. It should fill in the new creds or return an error. All pointers, barring the pointer to the new creds, are now const. (*) security_bprm_apply_creds(), ->bprm_apply_creds() Changed; now returns a value, which will cause the process to be killed if it's an error. (*) security_task_alloc(), ->task_alloc_security() Removed in favour of security_prepare_creds(). (*) security_cred_free(), ->cred_free() New. Free security data attached to cred->security. (*) security_prepare_creds(), ->cred_prepare() New. Duplicate any security data attached to cred->security. (*) security_commit_creds(), ->cred_commit() New. Apply any security effects for the upcoming installation of new security by commit_creds(). (*) security_task_post_setuid(), ->task_post_setuid() Removed in favour of security_task_fix_setuid(). (*) security_task_fix_setuid(), ->task_fix_setuid() Fix up the proposed new credentials for setuid(). This is used by cap_set_fix_setuid() to implicitly adjust capabilities in line with setuid() changes. Changes are made to the new credentials, rather than the task itself as in security_task_post_setuid(). (*) security_task_reparent_to_init(), ->task_reparent_to_init() Removed. Instead the task being reparented to init is referred directly to init's credentials. NOTE! This results in the loss of some state: SELinux's osid no longer records the sid of the thread that forked it. (*) security_key_alloc(), ->key_alloc() (*) security_key_permission(), ->key_permission() Changed. These now take cred pointers rather than task pointers to refer to the security context. (4) sys_capset(). This has been simplified and uses less locking. The LSM functions it calls have been merged. (5) reparent_to_kthreadd(). This gives the current thread the same credentials as init by simply using commit_thread() to point that way. (6) __sigqueue_alloc() and switch_uid() __sigqueue_alloc() can't stop the target task from changing its creds beneath it, so this function gets a reference to the currently applicable user_struct which it then passes into the sigqueue struct it returns if successful. switch_uid() is now called from commit_creds(), and possibly should be folded into that. commit_creds() should take care of protecting __sigqueue_alloc(). (7) [sg]et[ug]id() and co and [sg]et_current_groups. The set functions now all use prepare_creds(), commit_creds() and abort_creds() to build and check a new set of credentials before applying it. security_task_set[ug]id() is called inside the prepared section. This guarantees that nothing else will affect the creds until we've finished. The calling of set_dumpable() has been moved into commit_creds(). Much of the functionality of set_user() has been moved into commit_creds(). The get functions all simply access the data directly. (8) security_task_prctl() and cap_task_prctl(). security_task_prctl() has been modified to return -ENOSYS if it doesn't want to handle a function, or otherwise return the return value directly rather than through an argument. Additionally, cap_task_prctl() now prepares a new set of credentials, even if it doesn't end up using it. (9) Keyrings. A number of changes have been made to the keyrings code: (a) switch_uid_keyring(), copy_keys(), exit_keys() and suid_keys() have all been dropped and built in to the credentials functions directly. They may want separating out again later. (b) key_alloc() and search_process_keyrings() now take a cred pointer rather than a task pointer to specify the security context. (c) copy_creds() gives a new thread within the same thread group a new thread keyring if its parent had one, otherwise it discards the thread keyring. (d) The authorisation key now points directly to the credentials to extend the search into rather pointing to the task that carries them. (e) Installing thread, process or session keyrings causes a new set of credentials to be created, even though it's not strictly necessary for process or session keyrings (they're shared). (10) Usermode helper. The usermode helper code now carries a cred struct pointer in its subprocess_info struct instead of a new session keyring pointer. This set of credentials is derived from init_cred and installed on the new process after it has been cloned. call_usermodehelper_setup() allocates the new credentials and call_usermodehelper_freeinfo() discards them if they haven't been used. A special cred function (prepare_usermodeinfo_creds()) is provided specifically for call_usermodehelper_setup() to call. call_usermodehelper_setkeys() adjusts the credentials to sport the supplied keyring as the new session keyring. (11) SELinux. SELinux has a number of changes, in addition to those to support the LSM interface changes mentioned above: (a) selinux_setprocattr() no longer does its check for whether the current ptracer can access processes with the new SID inside the lock that covers getting the ptracer's SID. Whilst this lock ensures that the check is done with the ptracer pinned, the result is only valid until the lock is released, so there's no point doing it inside the lock. (12) is_single_threaded(). This function has been extracted from selinux_setprocattr() and put into a file of its own in the lib/ directory as join_session_keyring() now wants to use it too. The code in SELinux just checked to see whether a task shared mm_structs with other tasks (CLONE_VM), but that isn't good enough. We really want to know if they're part of the same thread group (CLONE_THREAD). (13) nfsd. The NFS server daemon now has to use the COW credentials to set the credentials it is going to use. It really needs to pass the credentials down to the functions it calls, but it can't do that until other patches in this series have been applied. Signed-off-by: David Howells <dhowells@redhat.com> Acked-by: James Morris <jmorris@namei.org> Signed-off-by: James Morris <jmorris@namei.org>
2008-11-14 07:39:23 +08:00
const struct cred *cred,
key_perm_t perm);
int (*key_getsecurity)(struct key *key, char **_buffer);
#endif /* CONFIG_KEYS */
#ifdef CONFIG_AUDIT
int (*audit_rule_init) (u32 field, u32 op, char *rulestr, void **lsmrule);
int (*audit_rule_known) (struct audit_krule *krule);
int (*audit_rule_match) (u32 secid, u32 field, u32 op, void *lsmrule,
struct audit_context *actx);
void (*audit_rule_free) (void *lsmrule);
#endif /* CONFIG_AUDIT */
};
/* prototypes */
extern int security_init(void);
extern int security_module_enable(struct security_operations *ops);
extern int register_security(struct security_operations *ops);
/* Security operations */
int security_ptrace_access_check(struct task_struct *child, unsigned int mode);
security: Fix setting of PF_SUPERPRIV by __capable() Fix the setting of PF_SUPERPRIV by __capable() as it could corrupt the flags the target process if that is not the current process and it is trying to change its own flags in a different way at the same time. __capable() is using neither atomic ops nor locking to protect t->flags. This patch removes __capable() and introduces has_capability() that doesn't set PF_SUPERPRIV on the process being queried. This patch further splits security_ptrace() in two: (1) security_ptrace_may_access(). This passes judgement on whether one process may access another only (PTRACE_MODE_ATTACH for ptrace() and PTRACE_MODE_READ for /proc), and takes a pointer to the child process. current is the parent. (2) security_ptrace_traceme(). This passes judgement on PTRACE_TRACEME only, and takes only a pointer to the parent process. current is the child. In Smack and commoncap, this uses has_capability() to determine whether the parent will be permitted to use PTRACE_ATTACH if normal checks fail. This does not set PF_SUPERPRIV. Two of the instances of __capable() actually only act on current, and so have been changed to calls to capable(). Of the places that were using __capable(): (1) The OOM killer calls __capable() thrice when weighing the killability of a process. All of these now use has_capability(). (2) cap_ptrace() and smack_ptrace() were using __capable() to check to see whether the parent was allowed to trace any process. As mentioned above, these have been split. For PTRACE_ATTACH and /proc, capable() is now used, and for PTRACE_TRACEME, has_capability() is used. (3) cap_safe_nice() only ever saw current, so now uses capable(). (4) smack_setprocattr() rejected accesses to tasks other than current just after calling __capable(), so the order of these two tests have been switched and capable() is used instead. (5) In smack_file_send_sigiotask(), we need to allow privileged processes to receive SIGIO on files they're manipulating. (6) In smack_task_wait(), we let a process wait for a privileged process, whether or not the process doing the waiting is privileged. I've tested this with the LTP SELinux and syscalls testscripts. Signed-off-by: David Howells <dhowells@redhat.com> Acked-by: Serge Hallyn <serue@us.ibm.com> Acked-by: Casey Schaufler <casey@schaufler-ca.com> Acked-by: Andrew G. Morgan <morgan@kernel.org> Acked-by: Al Viro <viro@zeniv.linux.org.uk> Signed-off-by: James Morris <jmorris@namei.org>
2008-08-14 18:37:28 +08:00
int security_ptrace_traceme(struct task_struct *parent);
int security_capget(struct task_struct *target,
kernel_cap_t *effective,
kernel_cap_t *inheritable,
kernel_cap_t *permitted);
CRED: Inaugurate COW credentials Inaugurate copy-on-write credentials management. This uses RCU to manage the credentials pointer in the task_struct with respect to accesses by other tasks. A process may only modify its own credentials, and so does not need locking to access or modify its own credentials. A mutex (cred_replace_mutex) is added to the task_struct to control the effect of PTRACE_ATTACHED on credential calculations, particularly with respect to execve(). With this patch, the contents of an active credentials struct may not be changed directly; rather a new set of credentials must be prepared, modified and committed using something like the following sequence of events: struct cred *new = prepare_creds(); int ret = blah(new); if (ret < 0) { abort_creds(new); return ret; } return commit_creds(new); There are some exceptions to this rule: the keyrings pointed to by the active credentials may be instantiated - keyrings violate the COW rule as managing COW keyrings is tricky, given that it is possible for a task to directly alter the keys in a keyring in use by another task. To help enforce this, various pointers to sets of credentials, such as those in the task_struct, are declared const. The purpose of this is compile-time discouragement of altering credentials through those pointers. Once a set of credentials has been made public through one of these pointers, it may not be modified, except under special circumstances: (1) Its reference count may incremented and decremented. (2) The keyrings to which it points may be modified, but not replaced. The only safe way to modify anything else is to create a replacement and commit using the functions described in Documentation/credentials.txt (which will be added by a later patch). This patch and the preceding patches have been tested with the LTP SELinux testsuite. This patch makes several logical sets of alteration: (1) execve(). This now prepares and commits credentials in various places in the security code rather than altering the current creds directly. (2) Temporary credential overrides. do_coredump() and sys_faccessat() now prepare their own credentials and temporarily override the ones currently on the acting thread, whilst preventing interference from other threads by holding cred_replace_mutex on the thread being dumped. This will be replaced in a future patch by something that hands down the credentials directly to the functions being called, rather than altering the task's objective credentials. (3) LSM interface. A number of functions have been changed, added or removed: (*) security_capset_check(), ->capset_check() (*) security_capset_set(), ->capset_set() Removed in favour of security_capset(). (*) security_capset(), ->capset() New. This is passed a pointer to the new creds, a pointer to the old creds and the proposed capability sets. It should fill in the new creds or return an error. All pointers, barring the pointer to the new creds, are now const. (*) security_bprm_apply_creds(), ->bprm_apply_creds() Changed; now returns a value, which will cause the process to be killed if it's an error. (*) security_task_alloc(), ->task_alloc_security() Removed in favour of security_prepare_creds(). (*) security_cred_free(), ->cred_free() New. Free security data attached to cred->security. (*) security_prepare_creds(), ->cred_prepare() New. Duplicate any security data attached to cred->security. (*) security_commit_creds(), ->cred_commit() New. Apply any security effects for the upcoming installation of new security by commit_creds(). (*) security_task_post_setuid(), ->task_post_setuid() Removed in favour of security_task_fix_setuid(). (*) security_task_fix_setuid(), ->task_fix_setuid() Fix up the proposed new credentials for setuid(). This is used by cap_set_fix_setuid() to implicitly adjust capabilities in line with setuid() changes. Changes are made to the new credentials, rather than the task itself as in security_task_post_setuid(). (*) security_task_reparent_to_init(), ->task_reparent_to_init() Removed. Instead the task being reparented to init is referred directly to init's credentials. NOTE! This results in the loss of some state: SELinux's osid no longer records the sid of the thread that forked it. (*) security_key_alloc(), ->key_alloc() (*) security_key_permission(), ->key_permission() Changed. These now take cred pointers rather than task pointers to refer to the security context. (4) sys_capset(). This has been simplified and uses less locking. The LSM functions it calls have been merged. (5) reparent_to_kthreadd(). This gives the current thread the same credentials as init by simply using commit_thread() to point that way. (6) __sigqueue_alloc() and switch_uid() __sigqueue_alloc() can't stop the target task from changing its creds beneath it, so this function gets a reference to the currently applicable user_struct which it then passes into the sigqueue struct it returns if successful. switch_uid() is now called from commit_creds(), and possibly should be folded into that. commit_creds() should take care of protecting __sigqueue_alloc(). (7) [sg]et[ug]id() and co and [sg]et_current_groups. The set functions now all use prepare_creds(), commit_creds() and abort_creds() to build and check a new set of credentials before applying it. security_task_set[ug]id() is called inside the prepared section. This guarantees that nothing else will affect the creds until we've finished. The calling of set_dumpable() has been moved into commit_creds(). Much of the functionality of set_user() has been moved into commit_creds(). The get functions all simply access the data directly. (8) security_task_prctl() and cap_task_prctl(). security_task_prctl() has been modified to return -ENOSYS if it doesn't want to handle a function, or otherwise return the return value directly rather than through an argument. Additionally, cap_task_prctl() now prepares a new set of credentials, even if it doesn't end up using it. (9) Keyrings. A number of changes have been made to the keyrings code: (a) switch_uid_keyring(), copy_keys(), exit_keys() and suid_keys() have all been dropped and built in to the credentials functions directly. They may want separating out again later. (b) key_alloc() and search_process_keyrings() now take a cred pointer rather than a task pointer to specify the security context. (c) copy_creds() gives a new thread within the same thread group a new thread keyring if its parent had one, otherwise it discards the thread keyring. (d) The authorisation key now points directly to the credentials to extend the search into rather pointing to the task that carries them. (e) Installing thread, process or session keyrings causes a new set of credentials to be created, even though it's not strictly necessary for process or session keyrings (they're shared). (10) Usermode helper. The usermode helper code now carries a cred struct pointer in its subprocess_info struct instead of a new session keyring pointer. This set of credentials is derived from init_cred and installed on the new process after it has been cloned. call_usermodehelper_setup() allocates the new credentials and call_usermodehelper_freeinfo() discards them if they haven't been used. A special cred function (prepare_usermodeinfo_creds()) is provided specifically for call_usermodehelper_setup() to call. call_usermodehelper_setkeys() adjusts the credentials to sport the supplied keyring as the new session keyring. (11) SELinux. SELinux has a number of changes, in addition to those to support the LSM interface changes mentioned above: (a) selinux_setprocattr() no longer does its check for whether the current ptracer can access processes with the new SID inside the lock that covers getting the ptracer's SID. Whilst this lock ensures that the check is done with the ptracer pinned, the result is only valid until the lock is released, so there's no point doing it inside the lock. (12) is_single_threaded(). This function has been extracted from selinux_setprocattr() and put into a file of its own in the lib/ directory as join_session_keyring() now wants to use it too. The code in SELinux just checked to see whether a task shared mm_structs with other tasks (CLONE_VM), but that isn't good enough. We really want to know if they're part of the same thread group (CLONE_THREAD). (13) nfsd. The NFS server daemon now has to use the COW credentials to set the credentials it is going to use. It really needs to pass the credentials down to the functions it calls, but it can't do that until other patches in this series have been applied. Signed-off-by: David Howells <dhowells@redhat.com> Acked-by: James Morris <jmorris@namei.org> Signed-off-by: James Morris <jmorris@namei.org>
2008-11-14 07:39:23 +08:00
int security_capset(struct cred *new, const struct cred *old,
const kernel_cap_t *effective,
const kernel_cap_t *inheritable,
const kernel_cap_t *permitted);
int security_capable(const struct cred *cred, int cap);
CRED: Fix regression in cap_capable() as shown up by sys_faccessat() [ver #3] Fix a regression in cap_capable() due to: commit 3b11a1decef07c19443d24ae926982bc8ec9f4c0 Author: David Howells <dhowells@redhat.com> Date: Fri Nov 14 10:39:26 2008 +1100 CRED: Differentiate objective and effective subjective credentials on a task The problem is that the above patch allows a process to have two sets of credentials, and for the most part uses the subjective credentials when accessing current's creds. There is, however, one exception: cap_capable(), and thus capable(), uses the real/objective credentials of the target task, whether or not it is the current task. Ordinarily this doesn't matter, since usually the two cred pointers in current point to the same set of creds. However, sys_faccessat() makes use of this facility to override the credentials of the calling process to make its test, without affecting the creds as seen from other processes. One of the things sys_faccessat() does is to make an adjustment to the effective capabilities mask, which cap_capable(), as it stands, then ignores. The affected capability check is in generic_permission(): if (!(mask & MAY_EXEC) || execute_ok(inode)) if (capable(CAP_DAC_OVERRIDE)) return 0; This change passes the set of credentials to be tested down into the commoncap and SELinux code. The security functions called by capable() and has_capability() select the appropriate set of credentials from the process being checked. This can be tested by compiling the following program from the XFS testsuite: /* * t_access_root.c - trivial test program to show permission bug. * * Written by Michael Kerrisk - copyright ownership not pursued. * Sourced from: http://linux.derkeiler.com/Mailing-Lists/Kernel/2003-10/6030.html */ #include <limits.h> #include <unistd.h> #include <stdio.h> #include <stdlib.h> #include <fcntl.h> #include <sys/stat.h> #define UID 500 #define GID 100 #define PERM 0 #define TESTPATH "/tmp/t_access" static void errExit(char *msg) { perror(msg); exit(EXIT_FAILURE); } /* errExit */ static void accessTest(char *file, int mask, char *mstr) { printf("access(%s, %s) returns %d\n", file, mstr, access(file, mask)); } /* accessTest */ int main(int argc, char *argv[]) { int fd, perm, uid, gid; char *testpath; char cmd[PATH_MAX + 20]; testpath = (argc > 1) ? argv[1] : TESTPATH; perm = (argc > 2) ? strtoul(argv[2], NULL, 8) : PERM; uid = (argc > 3) ? atoi(argv[3]) : UID; gid = (argc > 4) ? atoi(argv[4]) : GID; unlink(testpath); fd = open(testpath, O_RDWR | O_CREAT, 0); if (fd == -1) errExit("open"); if (fchown(fd, uid, gid) == -1) errExit("fchown"); if (fchmod(fd, perm) == -1) errExit("fchmod"); close(fd); snprintf(cmd, sizeof(cmd), "ls -l %s", testpath); system(cmd); if (seteuid(uid) == -1) errExit("seteuid"); accessTest(testpath, 0, "0"); accessTest(testpath, R_OK, "R_OK"); accessTest(testpath, W_OK, "W_OK"); accessTest(testpath, X_OK, "X_OK"); accessTest(testpath, R_OK | W_OK, "R_OK | W_OK"); accessTest(testpath, R_OK | X_OK, "R_OK | X_OK"); accessTest(testpath, W_OK | X_OK, "W_OK | X_OK"); accessTest(testpath, R_OK | W_OK | X_OK, "R_OK | W_OK | X_OK"); exit(EXIT_SUCCESS); } /* main */ This can be run against an Ext3 filesystem as well as against an XFS filesystem. If successful, it will show: [root@andromeda src]# ./t_access_root /tmp/xxx 0 4043 4043 ---------- 1 dhowells dhowells 0 2008-12-31 03:00 /tmp/xxx access(/tmp/xxx, 0) returns 0 access(/tmp/xxx, R_OK) returns 0 access(/tmp/xxx, W_OK) returns 0 access(/tmp/xxx, X_OK) returns -1 access(/tmp/xxx, R_OK | W_OK) returns 0 access(/tmp/xxx, R_OK | X_OK) returns -1 access(/tmp/xxx, W_OK | X_OK) returns -1 access(/tmp/xxx, R_OK | W_OK | X_OK) returns -1 If unsuccessful, it will show: [root@andromeda src]# ./t_access_root /tmp/xxx 0 4043 4043 ---------- 1 dhowells dhowells 0 2008-12-31 02:56 /tmp/xxx access(/tmp/xxx, 0) returns 0 access(/tmp/xxx, R_OK) returns -1 access(/tmp/xxx, W_OK) returns -1 access(/tmp/xxx, X_OK) returns -1 access(/tmp/xxx, R_OK | W_OK) returns -1 access(/tmp/xxx, R_OK | X_OK) returns -1 access(/tmp/xxx, W_OK | X_OK) returns -1 access(/tmp/xxx, R_OK | W_OK | X_OK) returns -1 I've also tested the fix with the SELinux and syscalls LTP testsuites. Signed-off-by: David Howells <dhowells@redhat.com> Tested-by: J. Bruce Fields <bfields@citi.umich.edu> Acked-by: Serge Hallyn <serue@us.ibm.com> Signed-off-by: James Morris <jmorris@namei.org>
2009-01-07 06:27:01 +08:00
int security_real_capable(struct task_struct *tsk, int cap);
int security_real_capable_noaudit(struct task_struct *tsk, int cap);
int security_sysctl(struct ctl_table *table, int op);
int security_quotactl(int cmds, int type, int id, struct super_block *sb);
int security_quota_on(struct dentry *dentry);
int security_syslog(int type);
int security_settime(struct timespec *ts, struct timezone *tz);
int security_vm_enough_memory(long pages);
int security_vm_enough_memory_mm(struct mm_struct *mm, long pages);
int security_vm_enough_memory_kern(long pages);
CRED: Make execve() take advantage of copy-on-write credentials Make execve() take advantage of copy-on-write credentials, allowing it to set up the credentials in advance, and then commit the whole lot after the point of no return. This patch and the preceding patches have been tested with the LTP SELinux testsuite. This patch makes several logical sets of alteration: (1) execve(). The credential bits from struct linux_binprm are, for the most part, replaced with a single credentials pointer (bprm->cred). This means that all the creds can be calculated in advance and then applied at the point of no return with no possibility of failure. I would like to replace bprm->cap_effective with: cap_isclear(bprm->cap_effective) but this seems impossible due to special behaviour for processes of pid 1 (they always retain their parent's capability masks where normally they'd be changed - see cap_bprm_set_creds()). The following sequence of events now happens: (a) At the start of do_execve, the current task's cred_exec_mutex is locked to prevent PTRACE_ATTACH from obsoleting the calculation of creds that we make. (a) prepare_exec_creds() is then called to make a copy of the current task's credentials and prepare it. This copy is then assigned to bprm->cred. This renders security_bprm_alloc() and security_bprm_free() unnecessary, and so they've been removed. (b) The determination of unsafe execution is now performed immediately after (a) rather than later on in the code. The result is stored in bprm->unsafe for future reference. (c) prepare_binprm() is called, possibly multiple times. (i) This applies the result of set[ug]id binaries to the new creds attached to bprm->cred. Personality bit clearance is recorded, but now deferred on the basis that the exec procedure may yet fail. (ii) This then calls the new security_bprm_set_creds(). This should calculate the new LSM and capability credentials into *bprm->cred. This folds together security_bprm_set() and parts of security_bprm_apply_creds() (these two have been removed). Anything that might fail must be done at this point. (iii) bprm->cred_prepared is set to 1. bprm->cred_prepared is 0 on the first pass of the security calculations, and 1 on all subsequent passes. This allows SELinux in (ii) to base its calculations only on the initial script and not on the interpreter. (d) flush_old_exec() is called to commit the task to execution. This performs the following steps with regard to credentials: (i) Clear pdeath_signal and set dumpable on certain circumstances that may not be covered by commit_creds(). (ii) Clear any bits in current->personality that were deferred from (c.i). (e) install_exec_creds() [compute_creds() as was] is called to install the new credentials. This performs the following steps with regard to credentials: (i) Calls security_bprm_committing_creds() to apply any security requirements, such as flushing unauthorised files in SELinux, that must be done before the credentials are changed. This is made up of bits of security_bprm_apply_creds() and security_bprm_post_apply_creds(), both of which have been removed. This function is not allowed to fail; anything that might fail must have been done in (c.ii). (ii) Calls commit_creds() to apply the new credentials in a single assignment (more or less). Possibly pdeath_signal and dumpable should be part of struct creds. (iii) Unlocks the task's cred_replace_mutex, thus allowing PTRACE_ATTACH to take place. (iv) Clears The bprm->cred pointer as the credentials it was holding are now immutable. (v) Calls security_bprm_committed_creds() to apply any security alterations that must be done after the creds have been changed. SELinux uses this to flush signals and signal handlers. (f) If an error occurs before (d.i), bprm_free() will call abort_creds() to destroy the proposed new credentials and will then unlock cred_replace_mutex. No changes to the credentials will have been made. (2) LSM interface. A number of functions have been changed, added or removed: (*) security_bprm_alloc(), ->bprm_alloc_security() (*) security_bprm_free(), ->bprm_free_security() Removed in favour of preparing new credentials and modifying those. (*) security_bprm_apply_creds(), ->bprm_apply_creds() (*) security_bprm_post_apply_creds(), ->bprm_post_apply_creds() Removed; split between security_bprm_set_creds(), security_bprm_committing_creds() and security_bprm_committed_creds(). (*) security_bprm_set(), ->bprm_set_security() Removed; folded into security_bprm_set_creds(). (*) security_bprm_set_creds(), ->bprm_set_creds() New. The new credentials in bprm->creds should be checked and set up as appropriate. bprm->cred_prepared is 0 on the first call, 1 on the second and subsequent calls. (*) security_bprm_committing_creds(), ->bprm_committing_creds() (*) security_bprm_committed_creds(), ->bprm_committed_creds() New. Apply the security effects of the new credentials. This includes closing unauthorised files in SELinux. This function may not fail. When the former is called, the creds haven't yet been applied to the process; when the latter is called, they have. The former may access bprm->cred, the latter may not. (3) SELinux. SELinux has a number of changes, in addition to those to support the LSM interface changes mentioned above: (a) The bprm_security_struct struct has been removed in favour of using the credentials-under-construction approach. (c) flush_unauthorized_files() now takes a cred pointer and passes it on to inode_has_perm(), file_has_perm() and dentry_open(). Signed-off-by: David Howells <dhowells@redhat.com> Acked-by: James Morris <jmorris@namei.org> Acked-by: Serge Hallyn <serue@us.ibm.com> Signed-off-by: James Morris <jmorris@namei.org>
2008-11-14 07:39:24 +08:00
int security_bprm_set_creds(struct linux_binprm *bprm);
int security_bprm_check(struct linux_binprm *bprm);
CRED: Make execve() take advantage of copy-on-write credentials Make execve() take advantage of copy-on-write credentials, allowing it to set up the credentials in advance, and then commit the whole lot after the point of no return. This patch and the preceding patches have been tested with the LTP SELinux testsuite. This patch makes several logical sets of alteration: (1) execve(). The credential bits from struct linux_binprm are, for the most part, replaced with a single credentials pointer (bprm->cred). This means that all the creds can be calculated in advance and then applied at the point of no return with no possibility of failure. I would like to replace bprm->cap_effective with: cap_isclear(bprm->cap_effective) but this seems impossible due to special behaviour for processes of pid 1 (they always retain their parent's capability masks where normally they'd be changed - see cap_bprm_set_creds()). The following sequence of events now happens: (a) At the start of do_execve, the current task's cred_exec_mutex is locked to prevent PTRACE_ATTACH from obsoleting the calculation of creds that we make. (a) prepare_exec_creds() is then called to make a copy of the current task's credentials and prepare it. This copy is then assigned to bprm->cred. This renders security_bprm_alloc() and security_bprm_free() unnecessary, and so they've been removed. (b) The determination of unsafe execution is now performed immediately after (a) rather than later on in the code. The result is stored in bprm->unsafe for future reference. (c) prepare_binprm() is called, possibly multiple times. (i) This applies the result of set[ug]id binaries to the new creds attached to bprm->cred. Personality bit clearance is recorded, but now deferred on the basis that the exec procedure may yet fail. (ii) This then calls the new security_bprm_set_creds(). This should calculate the new LSM and capability credentials into *bprm->cred. This folds together security_bprm_set() and parts of security_bprm_apply_creds() (these two have been removed). Anything that might fail must be done at this point. (iii) bprm->cred_prepared is set to 1. bprm->cred_prepared is 0 on the first pass of the security calculations, and 1 on all subsequent passes. This allows SELinux in (ii) to base its calculations only on the initial script and not on the interpreter. (d) flush_old_exec() is called to commit the task to execution. This performs the following steps with regard to credentials: (i) Clear pdeath_signal and set dumpable on certain circumstances that may not be covered by commit_creds(). (ii) Clear any bits in current->personality that were deferred from (c.i). (e) install_exec_creds() [compute_creds() as was] is called to install the new credentials. This performs the following steps with regard to credentials: (i) Calls security_bprm_committing_creds() to apply any security requirements, such as flushing unauthorised files in SELinux, that must be done before the credentials are changed. This is made up of bits of security_bprm_apply_creds() and security_bprm_post_apply_creds(), both of which have been removed. This function is not allowed to fail; anything that might fail must have been done in (c.ii). (ii) Calls commit_creds() to apply the new credentials in a single assignment (more or less). Possibly pdeath_signal and dumpable should be part of struct creds. (iii) Unlocks the task's cred_replace_mutex, thus allowing PTRACE_ATTACH to take place. (iv) Clears The bprm->cred pointer as the credentials it was holding are now immutable. (v) Calls security_bprm_committed_creds() to apply any security alterations that must be done after the creds have been changed. SELinux uses this to flush signals and signal handlers. (f) If an error occurs before (d.i), bprm_free() will call abort_creds() to destroy the proposed new credentials and will then unlock cred_replace_mutex. No changes to the credentials will have been made. (2) LSM interface. A number of functions have been changed, added or removed: (*) security_bprm_alloc(), ->bprm_alloc_security() (*) security_bprm_free(), ->bprm_free_security() Removed in favour of preparing new credentials and modifying those. (*) security_bprm_apply_creds(), ->bprm_apply_creds() (*) security_bprm_post_apply_creds(), ->bprm_post_apply_creds() Removed; split between security_bprm_set_creds(), security_bprm_committing_creds() and security_bprm_committed_creds(). (*) security_bprm_set(), ->bprm_set_security() Removed; folded into security_bprm_set_creds(). (*) security_bprm_set_creds(), ->bprm_set_creds() New. The new credentials in bprm->creds should be checked and set up as appropriate. bprm->cred_prepared is 0 on the first call, 1 on the second and subsequent calls. (*) security_bprm_committing_creds(), ->bprm_committing_creds() (*) security_bprm_committed_creds(), ->bprm_committed_creds() New. Apply the security effects of the new credentials. This includes closing unauthorised files in SELinux. This function may not fail. When the former is called, the creds haven't yet been applied to the process; when the latter is called, they have. The former may access bprm->cred, the latter may not. (3) SELinux. SELinux has a number of changes, in addition to those to support the LSM interface changes mentioned above: (a) The bprm_security_struct struct has been removed in favour of using the credentials-under-construction approach. (c) flush_unauthorized_files() now takes a cred pointer and passes it on to inode_has_perm(), file_has_perm() and dentry_open(). Signed-off-by: David Howells <dhowells@redhat.com> Acked-by: James Morris <jmorris@namei.org> Acked-by: Serge Hallyn <serue@us.ibm.com> Signed-off-by: James Morris <jmorris@namei.org>
2008-11-14 07:39:24 +08:00
void security_bprm_committing_creds(struct linux_binprm *bprm);
void security_bprm_committed_creds(struct linux_binprm *bprm);
int security_bprm_secureexec(struct linux_binprm *bprm);
int security_sb_alloc(struct super_block *sb);
void security_sb_free(struct super_block *sb);
int security_sb_copy_data(char *orig, char *copy);
int security_sb_kern_mount(struct super_block *sb, int flags, void *data);
int security_sb_show_options(struct seq_file *m, struct super_block *sb);
int security_sb_statfs(struct dentry *dentry);
int security_sb_mount(char *dev_name, struct path *path,
char *type, unsigned long flags, void *data);
int security_sb_umount(struct vfsmount *mnt, int flags);
int security_sb_pivotroot(struct path *old_path, struct path *new_path);
int security_sb_set_mnt_opts(struct super_block *sb, struct security_mnt_opts *opts);
Security: add get, set, and cloning of superblock security information Adds security_get_sb_mnt_opts, security_set_sb_mnt_opts, and security_clont_sb_mnt_opts to the LSM and to SELinux. This will allow filesystems to directly own and control all of their mount options if they so choose. This interface deals only with option identifiers and strings so it should generic enough for any LSM which may come in the future. Filesystems which pass text mount data around in the kernel (almost all of them) need not currently make use of this interface when dealing with SELinux since it will still parse those strings as it always has. I assume future LSM's would do the same. NFS is the primary FS which does not use text mount data and thus must make use of this interface. An LSM would need to implement these functions only if they had mount time options, such as selinux has context= or fscontext=. If the LSM has no mount time options they could simply not implement and let the dummy ops take care of things. An LSM other than SELinux would need to define new option numbers in security.h and any FS which decides to own there own security options would need to be patched to use this new interface for every possible LSM. This is because it was stated to me very clearly that LSM's should not attempt to understand FS mount data and the burdon to understand security should be in the FS which owns the options. Signed-off-by: Eric Paris <eparis@redhat.com> Acked-by: Stephen D. Smalley <sds@tycho.nsa.gov> Signed-off-by: James Morris <jmorris@namei.org>
2007-12-01 02:00:35 +08:00
void security_sb_clone_mnt_opts(const struct super_block *oldsb,
struct super_block *newsb);
int security_sb_parse_opts_str(char *options, struct security_mnt_opts *opts);
Security: add get, set, and cloning of superblock security information Adds security_get_sb_mnt_opts, security_set_sb_mnt_opts, and security_clont_sb_mnt_opts to the LSM and to SELinux. This will allow filesystems to directly own and control all of their mount options if they so choose. This interface deals only with option identifiers and strings so it should generic enough for any LSM which may come in the future. Filesystems which pass text mount data around in the kernel (almost all of them) need not currently make use of this interface when dealing with SELinux since it will still parse those strings as it always has. I assume future LSM's would do the same. NFS is the primary FS which does not use text mount data and thus must make use of this interface. An LSM would need to implement these functions only if they had mount time options, such as selinux has context= or fscontext=. If the LSM has no mount time options they could simply not implement and let the dummy ops take care of things. An LSM other than SELinux would need to define new option numbers in security.h and any FS which decides to own there own security options would need to be patched to use this new interface for every possible LSM. This is because it was stated to me very clearly that LSM's should not attempt to understand FS mount data and the burdon to understand security should be in the FS which owns the options. Signed-off-by: Eric Paris <eparis@redhat.com> Acked-by: Stephen D. Smalley <sds@tycho.nsa.gov> Signed-off-by: James Morris <jmorris@namei.org>
2007-12-01 02:00:35 +08:00
int security_inode_alloc(struct inode *inode);
void security_inode_free(struct inode *inode);
int security_inode_init_security(struct inode *inode, struct inode *dir,
char **name, void **value, size_t *len);
int security_inode_create(struct inode *dir, struct dentry *dentry, int mode);
int security_inode_link(struct dentry *old_dentry, struct inode *dir,
struct dentry *new_dentry);
int security_inode_unlink(struct inode *dir, struct dentry *dentry);
int security_inode_symlink(struct inode *dir, struct dentry *dentry,
const char *old_name);
int security_inode_mkdir(struct inode *dir, struct dentry *dentry, int mode);
int security_inode_rmdir(struct inode *dir, struct dentry *dentry);
int security_inode_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev);
int security_inode_rename(struct inode *old_dir, struct dentry *old_dentry,
struct inode *new_dir, struct dentry *new_dentry);
int security_inode_readlink(struct dentry *dentry);
int security_inode_follow_link(struct dentry *dentry, struct nameidata *nd);
int security_inode_permission(struct inode *inode, int mask);
fs: rcu-walk for path lookup Perform common cases of path lookups without any stores or locking in the ancestor dentry elements. This is called rcu-walk, as opposed to the current algorithm which is a refcount based walk, or ref-walk. This results in far fewer atomic operations on every path element, significantly improving path lookup performance. It also avoids cacheline bouncing on common dentries, significantly improving scalability. The overall design is like this: * LOOKUP_RCU is set in nd->flags, which distinguishes rcu-walk from ref-walk. * Take the RCU lock for the entire path walk, starting with the acquiring of the starting path (eg. root/cwd/fd-path). So now dentry refcounts are not required for dentry persistence. * synchronize_rcu is called when unregistering a filesystem, so we can access d_ops and i_ops during rcu-walk. * Similarly take the vfsmount lock for the entire path walk. So now mnt refcounts are not required for persistence. Also we are free to perform mount lookups, and to assume dentry mount points and mount roots are stable up and down the path. * Have a per-dentry seqlock to protect the dentry name, parent, and inode, so we can load this tuple atomically, and also check whether any of its members have changed. * Dentry lookups (based on parent, candidate string tuple) recheck the parent sequence after the child is found in case anything changed in the parent during the path walk. * inode is also RCU protected so we can load d_inode and use the inode for limited things. * i_mode, i_uid, i_gid can be tested for exec permissions during path walk. * i_op can be loaded. When we reach the destination dentry, we lock it, recheck lookup sequence, and increment its refcount and mountpoint refcount. RCU and vfsmount locks are dropped. This is termed "dropping rcu-walk". If the dentry refcount does not match, we can not drop rcu-walk gracefully at the current point in the lokup, so instead return -ECHILD (for want of a better errno). This signals the path walking code to re-do the entire lookup with a ref-walk. Aside from the final dentry, there are other situations that may be encounted where we cannot continue rcu-walk. In that case, we drop rcu-walk (ie. take a reference on the last good dentry) and continue with a ref-walk. Again, if we can drop rcu-walk gracefully, we return -ECHILD and do the whole lookup using ref-walk. But it is very important that we can continue with ref-walk for most cases, particularly to avoid the overhead of double lookups, and to gain the scalability advantages on common path elements (like cwd and root). The cases where rcu-walk cannot continue are: * NULL dentry (ie. any uncached path element) * parent with d_inode->i_op->permission or ACLs * dentries with d_revalidate * Following links In future patches, permission checks and d_revalidate become rcu-walk aware. It may be possible eventually to make following links rcu-walk aware. Uncached path elements will always require dropping to ref-walk mode, at the very least because i_mutex needs to be grabbed, and objects allocated. Signed-off-by: Nick Piggin <npiggin@kernel.dk>
2011-01-07 14:49:52 +08:00
int security_inode_exec_permission(struct inode *inode, unsigned int flags);
int security_inode_setattr(struct dentry *dentry, struct iattr *attr);
int security_inode_getattr(struct vfsmount *mnt, struct dentry *dentry);
int security_inode_setxattr(struct dentry *dentry, const char *name,
const void *value, size_t size, int flags);
void security_inode_post_setxattr(struct dentry *dentry, const char *name,
const void *value, size_t size, int flags);
int security_inode_getxattr(struct dentry *dentry, const char *name);
int security_inode_listxattr(struct dentry *dentry);
int security_inode_removexattr(struct dentry *dentry, const char *name);
Implement file posix capabilities Implement file posix capabilities. This allows programs to be given a subset of root's powers regardless of who runs them, without having to use setuid and giving the binary all of root's powers. This version works with Kaigai Kohei's userspace tools, found at http://www.kaigai.gr.jp/index.php. For more information on how to use this patch, Chris Friedhoff has posted a nice page at http://www.friedhoff.org/fscaps.html. Changelog: Nov 27: Incorporate fixes from Andrew Morton (security-introduce-file-caps-tweaks and security-introduce-file-caps-warning-fix) Fix Kconfig dependency. Fix change signaling behavior when file caps are not compiled in. Nov 13: Integrate comments from Alexey: Remove CONFIG_ ifdef from capability.h, and use %zd for printing a size_t. Nov 13: Fix endianness warnings by sparse as suggested by Alexey Dobriyan. Nov 09: Address warnings of unused variables at cap_bprm_set_security when file capabilities are disabled, and simultaneously clean up the code a little, by pulling the new code into a helper function. Nov 08: For pointers to required userspace tools and how to use them, see http://www.friedhoff.org/fscaps.html. Nov 07: Fix the calculation of the highest bit checked in check_cap_sanity(). Nov 07: Allow file caps to be enabled without CONFIG_SECURITY, since capabilities are the default. Hook cap_task_setscheduler when !CONFIG_SECURITY. Move capable(TASK_KILL) to end of cap_task_kill to reduce audit messages. Nov 05: Add secondary calls in selinux/hooks.c to task_setioprio and task_setscheduler so that selinux and capabilities with file cap support can be stacked. Sep 05: As Seth Arnold points out, uid checks are out of place for capability code. Sep 01: Define task_setscheduler, task_setioprio, cap_task_kill, and task_setnice to make sure a user cannot affect a process in which they called a program with some fscaps. One remaining question is the note under task_setscheduler: are we ok with CAP_SYS_NICE being sufficient to confine a process to a cpuset? It is a semantic change, as without fsccaps, attach_task doesn't allow CAP_SYS_NICE to override the uid equivalence check. But since it uses security_task_setscheduler, which elsewhere is used where CAP_SYS_NICE can be used to override the uid equivalence check, fixing it might be tough. task_setscheduler note: this also controls cpuset:attach_task. Are we ok with CAP_SYS_NICE being used to confine to a cpuset? task_setioprio task_setnice sys_setpriority uses this (through set_one_prio) for another process. Need same checks as setrlimit Aug 21: Updated secureexec implementation to reflect the fact that euid and uid might be the same and nonzero, but the process might still have elevated caps. Aug 15: Handle endianness of xattrs. Enforce capability version match between kernel and disk. Enforce that no bits beyond the known max capability are set, else return -EPERM. With this extra processing, it may be worth reconsidering doing all the work at bprm_set_security rather than d_instantiate. Aug 10: Always call getxattr at bprm_set_security, rather than caching it at d_instantiate. [morgan@kernel.org: file-caps clean up for linux/capability.h] [bunk@kernel.org: unexport cap_inode_killpriv] Signed-off-by: Serge E. Hallyn <serue@us.ibm.com> Cc: Stephen Smalley <sds@tycho.nsa.gov> Cc: James Morris <jmorris@namei.org> Cc: Chris Wright <chrisw@sous-sol.org> Cc: Andrew Morgan <morgan@kernel.org> Signed-off-by: Andrew Morgan <morgan@kernel.org> Signed-off-by: Adrian Bunk <bunk@kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2007-10-17 14:31:36 +08:00
int security_inode_need_killpriv(struct dentry *dentry);
int security_inode_killpriv(struct dentry *dentry);
int security_inode_getsecurity(const struct inode *inode, const char *name, void **buffer, bool alloc);
int security_inode_setsecurity(struct inode *inode, const char *name, const void *value, size_t size, int flags);
int security_inode_listsecurity(struct inode *inode, char *buffer, size_t buffer_size);
void security_inode_getsecid(const struct inode *inode, u32 *secid);
int security_file_permission(struct file *file, int mask);
int security_file_alloc(struct file *file);
void security_file_free(struct file *file);
int security_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
int security_file_mmap(struct file *file, unsigned long reqprot,
unsigned long prot, unsigned long flags,
unsigned long addr, unsigned long addr_only);
int security_file_mprotect(struct vm_area_struct *vma, unsigned long reqprot,
unsigned long prot);
int security_file_lock(struct file *file, unsigned int cmd);
int security_file_fcntl(struct file *file, unsigned int cmd, unsigned long arg);
int security_file_set_fowner(struct file *file);
int security_file_send_sigiotask(struct task_struct *tsk,
struct fown_struct *fown, int sig);
int security_file_receive(struct file *file);
int security_dentry_open(struct file *file, const struct cred *cred);
int security_task_create(unsigned long clone_flags);
KEYS: Add a keyctl to install a process's session keyring on its parent [try #6] Add a keyctl to install a process's session keyring onto its parent. This replaces the parent's session keyring. Because the COW credential code does not permit one process to change another process's credentials directly, the change is deferred until userspace next starts executing again. Normally this will be after a wait*() syscall. To support this, three new security hooks have been provided: cred_alloc_blank() to allocate unset security creds, cred_transfer() to fill in the blank security creds and key_session_to_parent() - which asks the LSM if the process may replace its parent's session keyring. The replacement may only happen if the process has the same ownership details as its parent, and the process has LINK permission on the session keyring, and the session keyring is owned by the process, and the LSM permits it. Note that this requires alteration to each architecture's notify_resume path. This has been done for all arches barring blackfin, m68k* and xtensa, all of which need assembly alteration to support TIF_NOTIFY_RESUME. This allows the replacement to be performed at the point the parent process resumes userspace execution. This allows the userspace AFS pioctl emulation to fully emulate newpag() and the VIOCSETTOK and VIOCSETTOK2 pioctls, all of which require the ability to alter the parent process's PAG membership. However, since kAFS doesn't use PAGs per se, but rather dumps the keys into the session keyring, the session keyring of the parent must be replaced if, for example, VIOCSETTOK is passed the newpag flag. This can be tested with the following program: #include <stdio.h> #include <stdlib.h> #include <keyutils.h> #define KEYCTL_SESSION_TO_PARENT 18 #define OSERROR(X, S) do { if ((long)(X) == -1) { perror(S); exit(1); } } while(0) int main(int argc, char **argv) { key_serial_t keyring, key; long ret; keyring = keyctl_join_session_keyring(argv[1]); OSERROR(keyring, "keyctl_join_session_keyring"); key = add_key("user", "a", "b", 1, keyring); OSERROR(key, "add_key"); ret = keyctl(KEYCTL_SESSION_TO_PARENT); OSERROR(ret, "KEYCTL_SESSION_TO_PARENT"); return 0; } Compiled and linked with -lkeyutils, you should see something like: [dhowells@andromeda ~]$ keyctl show Session Keyring -3 --alswrv 4043 4043 keyring: _ses 355907932 --alswrv 4043 -1 \_ keyring: _uid.4043 [dhowells@andromeda ~]$ /tmp/newpag [dhowells@andromeda ~]$ keyctl show Session Keyring -3 --alswrv 4043 4043 keyring: _ses 1055658746 --alswrv 4043 4043 \_ user: a [dhowells@andromeda ~]$ /tmp/newpag hello [dhowells@andromeda ~]$ keyctl show Session Keyring -3 --alswrv 4043 4043 keyring: hello 340417692 --alswrv 4043 4043 \_ user: a Where the test program creates a new session keyring, sticks a user key named 'a' into it and then installs it on its parent. Signed-off-by: David Howells <dhowells@redhat.com> Signed-off-by: James Morris <jmorris@namei.org>
2009-09-02 16:14:21 +08:00
int security_cred_alloc_blank(struct cred *cred, gfp_t gfp);
void security_cred_free(struct cred *cred);
CRED: Inaugurate COW credentials Inaugurate copy-on-write credentials management. This uses RCU to manage the credentials pointer in the task_struct with respect to accesses by other tasks. A process may only modify its own credentials, and so does not need locking to access or modify its own credentials. A mutex (cred_replace_mutex) is added to the task_struct to control the effect of PTRACE_ATTACHED on credential calculations, particularly with respect to execve(). With this patch, the contents of an active credentials struct may not be changed directly; rather a new set of credentials must be prepared, modified and committed using something like the following sequence of events: struct cred *new = prepare_creds(); int ret = blah(new); if (ret < 0) { abort_creds(new); return ret; } return commit_creds(new); There are some exceptions to this rule: the keyrings pointed to by the active credentials may be instantiated - keyrings violate the COW rule as managing COW keyrings is tricky, given that it is possible for a task to directly alter the keys in a keyring in use by another task. To help enforce this, various pointers to sets of credentials, such as those in the task_struct, are declared const. The purpose of this is compile-time discouragement of altering credentials through those pointers. Once a set of credentials has been made public through one of these pointers, it may not be modified, except under special circumstances: (1) Its reference count may incremented and decremented. (2) The keyrings to which it points may be modified, but not replaced. The only safe way to modify anything else is to create a replacement and commit using the functions described in Documentation/credentials.txt (which will be added by a later patch). This patch and the preceding patches have been tested with the LTP SELinux testsuite. This patch makes several logical sets of alteration: (1) execve(). This now prepares and commits credentials in various places in the security code rather than altering the current creds directly. (2) Temporary credential overrides. do_coredump() and sys_faccessat() now prepare their own credentials and temporarily override the ones currently on the acting thread, whilst preventing interference from other threads by holding cred_replace_mutex on the thread being dumped. This will be replaced in a future patch by something that hands down the credentials directly to the functions being called, rather than altering the task's objective credentials. (3) LSM interface. A number of functions have been changed, added or removed: (*) security_capset_check(), ->capset_check() (*) security_capset_set(), ->capset_set() Removed in favour of security_capset(). (*) security_capset(), ->capset() New. This is passed a pointer to the new creds, a pointer to the old creds and the proposed capability sets. It should fill in the new creds or return an error. All pointers, barring the pointer to the new creds, are now const. (*) security_bprm_apply_creds(), ->bprm_apply_creds() Changed; now returns a value, which will cause the process to be killed if it's an error. (*) security_task_alloc(), ->task_alloc_security() Removed in favour of security_prepare_creds(). (*) security_cred_free(), ->cred_free() New. Free security data attached to cred->security. (*) security_prepare_creds(), ->cred_prepare() New. Duplicate any security data attached to cred->security. (*) security_commit_creds(), ->cred_commit() New. Apply any security effects for the upcoming installation of new security by commit_creds(). (*) security_task_post_setuid(), ->task_post_setuid() Removed in favour of security_task_fix_setuid(). (*) security_task_fix_setuid(), ->task_fix_setuid() Fix up the proposed new credentials for setuid(). This is used by cap_set_fix_setuid() to implicitly adjust capabilities in line with setuid() changes. Changes are made to the new credentials, rather than the task itself as in security_task_post_setuid(). (*) security_task_reparent_to_init(), ->task_reparent_to_init() Removed. Instead the task being reparented to init is referred directly to init's credentials. NOTE! This results in the loss of some state: SELinux's osid no longer records the sid of the thread that forked it. (*) security_key_alloc(), ->key_alloc() (*) security_key_permission(), ->key_permission() Changed. These now take cred pointers rather than task pointers to refer to the security context. (4) sys_capset(). This has been simplified and uses less locking. The LSM functions it calls have been merged. (5) reparent_to_kthreadd(). This gives the current thread the same credentials as init by simply using commit_thread() to point that way. (6) __sigqueue_alloc() and switch_uid() __sigqueue_alloc() can't stop the target task from changing its creds beneath it, so this function gets a reference to the currently applicable user_struct which it then passes into the sigqueue struct it returns if successful. switch_uid() is now called from commit_creds(), and possibly should be folded into that. commit_creds() should take care of protecting __sigqueue_alloc(). (7) [sg]et[ug]id() and co and [sg]et_current_groups. The set functions now all use prepare_creds(), commit_creds() and abort_creds() to build and check a new set of credentials before applying it. security_task_set[ug]id() is called inside the prepared section. This guarantees that nothing else will affect the creds until we've finished. The calling of set_dumpable() has been moved into commit_creds(). Much of the functionality of set_user() has been moved into commit_creds(). The get functions all simply access the data directly. (8) security_task_prctl() and cap_task_prctl(). security_task_prctl() has been modified to return -ENOSYS if it doesn't want to handle a function, or otherwise return the return value directly rather than through an argument. Additionally, cap_task_prctl() now prepares a new set of credentials, even if it doesn't end up using it. (9) Keyrings. A number of changes have been made to the keyrings code: (a) switch_uid_keyring(), copy_keys(), exit_keys() and suid_keys() have all been dropped and built in to the credentials functions directly. They may want separating out again later. (b) key_alloc() and search_process_keyrings() now take a cred pointer rather than a task pointer to specify the security context. (c) copy_creds() gives a new thread within the same thread group a new thread keyring if its parent had one, otherwise it discards the thread keyring. (d) The authorisation key now points directly to the credentials to extend the search into rather pointing to the task that carries them. (e) Installing thread, process or session keyrings causes a new set of credentials to be created, even though it's not strictly necessary for process or session keyrings (they're shared). (10) Usermode helper. The usermode helper code now carries a cred struct pointer in its subprocess_info struct instead of a new session keyring pointer. This set of credentials is derived from init_cred and installed on the new process after it has been cloned. call_usermodehelper_setup() allocates the new credentials and call_usermodehelper_freeinfo() discards them if they haven't been used. A special cred function (prepare_usermodeinfo_creds()) is provided specifically for call_usermodehelper_setup() to call. call_usermodehelper_setkeys() adjusts the credentials to sport the supplied keyring as the new session keyring. (11) SELinux. SELinux has a number of changes, in addition to those to support the LSM interface changes mentioned above: (a) selinux_setprocattr() no longer does its check for whether the current ptracer can access processes with the new SID inside the lock that covers getting the ptracer's SID. Whilst this lock ensures that the check is done with the ptracer pinned, the result is only valid until the lock is released, so there's no point doing it inside the lock. (12) is_single_threaded(). This function has been extracted from selinux_setprocattr() and put into a file of its own in the lib/ directory as join_session_keyring() now wants to use it too. The code in SELinux just checked to see whether a task shared mm_structs with other tasks (CLONE_VM), but that isn't good enough. We really want to know if they're part of the same thread group (CLONE_THREAD). (13) nfsd. The NFS server daemon now has to use the COW credentials to set the credentials it is going to use. It really needs to pass the credentials down to the functions it calls, but it can't do that until other patches in this series have been applied. Signed-off-by: David Howells <dhowells@redhat.com> Acked-by: James Morris <jmorris@namei.org> Signed-off-by: James Morris <jmorris@namei.org>
2008-11-14 07:39:23 +08:00
int security_prepare_creds(struct cred *new, const struct cred *old, gfp_t gfp);
KEYS: Add a keyctl to install a process's session keyring on its parent [try #6] Add a keyctl to install a process's session keyring onto its parent. This replaces the parent's session keyring. Because the COW credential code does not permit one process to change another process's credentials directly, the change is deferred until userspace next starts executing again. Normally this will be after a wait*() syscall. To support this, three new security hooks have been provided: cred_alloc_blank() to allocate unset security creds, cred_transfer() to fill in the blank security creds and key_session_to_parent() - which asks the LSM if the process may replace its parent's session keyring. The replacement may only happen if the process has the same ownership details as its parent, and the process has LINK permission on the session keyring, and the session keyring is owned by the process, and the LSM permits it. Note that this requires alteration to each architecture's notify_resume path. This has been done for all arches barring blackfin, m68k* and xtensa, all of which need assembly alteration to support TIF_NOTIFY_RESUME. This allows the replacement to be performed at the point the parent process resumes userspace execution. This allows the userspace AFS pioctl emulation to fully emulate newpag() and the VIOCSETTOK and VIOCSETTOK2 pioctls, all of which require the ability to alter the parent process's PAG membership. However, since kAFS doesn't use PAGs per se, but rather dumps the keys into the session keyring, the session keyring of the parent must be replaced if, for example, VIOCSETTOK is passed the newpag flag. This can be tested with the following program: #include <stdio.h> #include <stdlib.h> #include <keyutils.h> #define KEYCTL_SESSION_TO_PARENT 18 #define OSERROR(X, S) do { if ((long)(X) == -1) { perror(S); exit(1); } } while(0) int main(int argc, char **argv) { key_serial_t keyring, key; long ret; keyring = keyctl_join_session_keyring(argv[1]); OSERROR(keyring, "keyctl_join_session_keyring"); key = add_key("user", "a", "b", 1, keyring); OSERROR(key, "add_key"); ret = keyctl(KEYCTL_SESSION_TO_PARENT); OSERROR(ret, "KEYCTL_SESSION_TO_PARENT"); return 0; } Compiled and linked with -lkeyutils, you should see something like: [dhowells@andromeda ~]$ keyctl show Session Keyring -3 --alswrv 4043 4043 keyring: _ses 355907932 --alswrv 4043 -1 \_ keyring: _uid.4043 [dhowells@andromeda ~]$ /tmp/newpag [dhowells@andromeda ~]$ keyctl show Session Keyring -3 --alswrv 4043 4043 keyring: _ses 1055658746 --alswrv 4043 4043 \_ user: a [dhowells@andromeda ~]$ /tmp/newpag hello [dhowells@andromeda ~]$ keyctl show Session Keyring -3 --alswrv 4043 4043 keyring: hello 340417692 --alswrv 4043 4043 \_ user: a Where the test program creates a new session keyring, sticks a user key named 'a' into it and then installs it on its parent. Signed-off-by: David Howells <dhowells@redhat.com> Signed-off-by: James Morris <jmorris@namei.org>
2009-09-02 16:14:21 +08:00
void security_transfer_creds(struct cred *new, const struct cred *old);
CRED: Allow kernel services to override LSM settings for task actions Allow kernel services to override LSM settings appropriate to the actions performed by a task by duplicating a set of credentials, modifying it and then using task_struct::cred to point to it when performing operations on behalf of a task. This is used, for example, by CacheFiles which has to transparently access the cache on behalf of a process that thinks it is doing, say, NFS accesses with a potentially inappropriate (with respect to accessing the cache) set of credentials. This patch provides two LSM hooks for modifying a task security record: (*) security_kernel_act_as() which allows modification of the security datum with which a task acts on other objects (most notably files). (*) security_kernel_create_files_as() which allows modification of the security datum that is used to initialise the security data on a file that a task creates. The patch also provides four new credentials handling functions, which wrap the LSM functions: (1) prepare_kernel_cred() Prepare a set of credentials for a kernel service to use, based either on a daemon's credentials or on init_cred. All the keyrings are cleared. (2) set_security_override() Set the LSM security ID in a set of credentials to a specific security context, assuming permission from the LSM policy. (3) set_security_override_from_ctx() As (2), but takes the security context as a string. (4) set_create_files_as() Set the file creation LSM security ID in a set of credentials to be the same as that on a particular inode. Signed-off-by: Casey Schaufler <casey@schaufler-ca.com> [Smack changes] Signed-off-by: David Howells <dhowells@redhat.com> Signed-off-by: James Morris <jmorris@namei.org>
2008-11-14 07:39:28 +08:00
int security_kernel_act_as(struct cred *new, u32 secid);
int security_kernel_create_files_as(struct cred *new, struct inode *inode);
int security_kernel_module_request(char *kmod_name);
CRED: Inaugurate COW credentials Inaugurate copy-on-write credentials management. This uses RCU to manage the credentials pointer in the task_struct with respect to accesses by other tasks. A process may only modify its own credentials, and so does not need locking to access or modify its own credentials. A mutex (cred_replace_mutex) is added to the task_struct to control the effect of PTRACE_ATTACHED on credential calculations, particularly with respect to execve(). With this patch, the contents of an active credentials struct may not be changed directly; rather a new set of credentials must be prepared, modified and committed using something like the following sequence of events: struct cred *new = prepare_creds(); int ret = blah(new); if (ret < 0) { abort_creds(new); return ret; } return commit_creds(new); There are some exceptions to this rule: the keyrings pointed to by the active credentials may be instantiated - keyrings violate the COW rule as managing COW keyrings is tricky, given that it is possible for a task to directly alter the keys in a keyring in use by another task. To help enforce this, various pointers to sets of credentials, such as those in the task_struct, are declared const. The purpose of this is compile-time discouragement of altering credentials through those pointers. Once a set of credentials has been made public through one of these pointers, it may not be modified, except under special circumstances: (1) Its reference count may incremented and decremented. (2) The keyrings to which it points may be modified, but not replaced. The only safe way to modify anything else is to create a replacement and commit using the functions described in Documentation/credentials.txt (which will be added by a later patch). This patch and the preceding patches have been tested with the LTP SELinux testsuite. This patch makes several logical sets of alteration: (1) execve(). This now prepares and commits credentials in various places in the security code rather than altering the current creds directly. (2) Temporary credential overrides. do_coredump() and sys_faccessat() now prepare their own credentials and temporarily override the ones currently on the acting thread, whilst preventing interference from other threads by holding cred_replace_mutex on the thread being dumped. This will be replaced in a future patch by something that hands down the credentials directly to the functions being called, rather than altering the task's objective credentials. (3) LSM interface. A number of functions have been changed, added or removed: (*) security_capset_check(), ->capset_check() (*) security_capset_set(), ->capset_set() Removed in favour of security_capset(). (*) security_capset(), ->capset() New. This is passed a pointer to the new creds, a pointer to the old creds and the proposed capability sets. It should fill in the new creds or return an error. All pointers, barring the pointer to the new creds, are now const. (*) security_bprm_apply_creds(), ->bprm_apply_creds() Changed; now returns a value, which will cause the process to be killed if it's an error. (*) security_task_alloc(), ->task_alloc_security() Removed in favour of security_prepare_creds(). (*) security_cred_free(), ->cred_free() New. Free security data attached to cred->security. (*) security_prepare_creds(), ->cred_prepare() New. Duplicate any security data attached to cred->security. (*) security_commit_creds(), ->cred_commit() New. Apply any security effects for the upcoming installation of new security by commit_creds(). (*) security_task_post_setuid(), ->task_post_setuid() Removed in favour of security_task_fix_setuid(). (*) security_task_fix_setuid(), ->task_fix_setuid() Fix up the proposed new credentials for setuid(). This is used by cap_set_fix_setuid() to implicitly adjust capabilities in line with setuid() changes. Changes are made to the new credentials, rather than the task itself as in security_task_post_setuid(). (*) security_task_reparent_to_init(), ->task_reparent_to_init() Removed. Instead the task being reparented to init is referred directly to init's credentials. NOTE! This results in the loss of some state: SELinux's osid no longer records the sid of the thread that forked it. (*) security_key_alloc(), ->key_alloc() (*) security_key_permission(), ->key_permission() Changed. These now take cred pointers rather than task pointers to refer to the security context. (4) sys_capset(). This has been simplified and uses less locking. The LSM functions it calls have been merged. (5) reparent_to_kthreadd(). This gives the current thread the same credentials as init by simply using commit_thread() to point that way. (6) __sigqueue_alloc() and switch_uid() __sigqueue_alloc() can't stop the target task from changing its creds beneath it, so this function gets a reference to the currently applicable user_struct which it then passes into the sigqueue struct it returns if successful. switch_uid() is now called from commit_creds(), and possibly should be folded into that. commit_creds() should take care of protecting __sigqueue_alloc(). (7) [sg]et[ug]id() and co and [sg]et_current_groups. The set functions now all use prepare_creds(), commit_creds() and abort_creds() to build and check a new set of credentials before applying it. security_task_set[ug]id() is called inside the prepared section. This guarantees that nothing else will affect the creds until we've finished. The calling of set_dumpable() has been moved into commit_creds(). Much of the functionality of set_user() has been moved into commit_creds(). The get functions all simply access the data directly. (8) security_task_prctl() and cap_task_prctl(). security_task_prctl() has been modified to return -ENOSYS if it doesn't want to handle a function, or otherwise return the return value directly rather than through an argument. Additionally, cap_task_prctl() now prepares a new set of credentials, even if it doesn't end up using it. (9) Keyrings. A number of changes have been made to the keyrings code: (a) switch_uid_keyring(), copy_keys(), exit_keys() and suid_keys() have all been dropped and built in to the credentials functions directly. They may want separating out again later. (b) key_alloc() and search_process_keyrings() now take a cred pointer rather than a task pointer to specify the security context. (c) copy_creds() gives a new thread within the same thread group a new thread keyring if its parent had one, otherwise it discards the thread keyring. (d) The authorisation key now points directly to the credentials to extend the search into rather pointing to the task that carries them. (e) Installing thread, process or session keyrings causes a new set of credentials to be created, even though it's not strictly necessary for process or session keyrings (they're shared). (10) Usermode helper. The usermode helper code now carries a cred struct pointer in its subprocess_info struct instead of a new session keyring pointer. This set of credentials is derived from init_cred and installed on the new process after it has been cloned. call_usermodehelper_setup() allocates the new credentials and call_usermodehelper_freeinfo() discards them if they haven't been used. A special cred function (prepare_usermodeinfo_creds()) is provided specifically for call_usermodehelper_setup() to call. call_usermodehelper_setkeys() adjusts the credentials to sport the supplied keyring as the new session keyring. (11) SELinux. SELinux has a number of changes, in addition to those to support the LSM interface changes mentioned above: (a) selinux_setprocattr() no longer does its check for whether the current ptracer can access processes with the new SID inside the lock that covers getting the ptracer's SID. Whilst this lock ensures that the check is done with the ptracer pinned, the result is only valid until the lock is released, so there's no point doing it inside the lock. (12) is_single_threaded(). This function has been extracted from selinux_setprocattr() and put into a file of its own in the lib/ directory as join_session_keyring() now wants to use it too. The code in SELinux just checked to see whether a task shared mm_structs with other tasks (CLONE_VM), but that isn't good enough. We really want to know if they're part of the same thread group (CLONE_THREAD). (13) nfsd. The NFS server daemon now has to use the COW credentials to set the credentials it is going to use. It really needs to pass the credentials down to the functions it calls, but it can't do that until other patches in this series have been applied. Signed-off-by: David Howells <dhowells@redhat.com> Acked-by: James Morris <jmorris@namei.org> Signed-off-by: James Morris <jmorris@namei.org>
2008-11-14 07:39:23 +08:00
int security_task_fix_setuid(struct cred *new, const struct cred *old,
int flags);
int security_task_setpgid(struct task_struct *p, pid_t pgid);
int security_task_getpgid(struct task_struct *p);
int security_task_getsid(struct task_struct *p);
void security_task_getsecid(struct task_struct *p, u32 *secid);
int security_task_setnice(struct task_struct *p, int nice);
int security_task_setioprio(struct task_struct *p, int ioprio);
int security_task_getioprio(struct task_struct *p);
int security_task_setrlimit(struct task_struct *p, unsigned int resource,
struct rlimit *new_rlim);
int security_task_setscheduler(struct task_struct *p);
int security_task_getscheduler(struct task_struct *p);
int security_task_movememory(struct task_struct *p);
int security_task_kill(struct task_struct *p, struct siginfo *info,
int sig, u32 secid);
int security_task_wait(struct task_struct *p);
int security_task_prctl(int option, unsigned long arg2, unsigned long arg3,
CRED: Inaugurate COW credentials Inaugurate copy-on-write credentials management. This uses RCU to manage the credentials pointer in the task_struct with respect to accesses by other tasks. A process may only modify its own credentials, and so does not need locking to access or modify its own credentials. A mutex (cred_replace_mutex) is added to the task_struct to control the effect of PTRACE_ATTACHED on credential calculations, particularly with respect to execve(). With this patch, the contents of an active credentials struct may not be changed directly; rather a new set of credentials must be prepared, modified and committed using something like the following sequence of events: struct cred *new = prepare_creds(); int ret = blah(new); if (ret < 0) { abort_creds(new); return ret; } return commit_creds(new); There are some exceptions to this rule: the keyrings pointed to by the active credentials may be instantiated - keyrings violate the COW rule as managing COW keyrings is tricky, given that it is possible for a task to directly alter the keys in a keyring in use by another task. To help enforce this, various pointers to sets of credentials, such as those in the task_struct, are declared const. The purpose of this is compile-time discouragement of altering credentials through those pointers. Once a set of credentials has been made public through one of these pointers, it may not be modified, except under special circumstances: (1) Its reference count may incremented and decremented. (2) The keyrings to which it points may be modified, but not replaced. The only safe way to modify anything else is to create a replacement and commit using the functions described in Documentation/credentials.txt (which will be added by a later patch). This patch and the preceding patches have been tested with the LTP SELinux testsuite. This patch makes several logical sets of alteration: (1) execve(). This now prepares and commits credentials in various places in the security code rather than altering the current creds directly. (2) Temporary credential overrides. do_coredump() and sys_faccessat() now prepare their own credentials and temporarily override the ones currently on the acting thread, whilst preventing interference from other threads by holding cred_replace_mutex on the thread being dumped. This will be replaced in a future patch by something that hands down the credentials directly to the functions being called, rather than altering the task's objective credentials. (3) LSM interface. A number of functions have been changed, added or removed: (*) security_capset_check(), ->capset_check() (*) security_capset_set(), ->capset_set() Removed in favour of security_capset(). (*) security_capset(), ->capset() New. This is passed a pointer to the new creds, a pointer to the old creds and the proposed capability sets. It should fill in the new creds or return an error. All pointers, barring the pointer to the new creds, are now const. (*) security_bprm_apply_creds(), ->bprm_apply_creds() Changed; now returns a value, which will cause the process to be killed if it's an error. (*) security_task_alloc(), ->task_alloc_security() Removed in favour of security_prepare_creds(). (*) security_cred_free(), ->cred_free() New. Free security data attached to cred->security. (*) security_prepare_creds(), ->cred_prepare() New. Duplicate any security data attached to cred->security. (*) security_commit_creds(), ->cred_commit() New. Apply any security effects for the upcoming installation of new security by commit_creds(). (*) security_task_post_setuid(), ->task_post_setuid() Removed in favour of security_task_fix_setuid(). (*) security_task_fix_setuid(), ->task_fix_setuid() Fix up the proposed new credentials for setuid(). This is used by cap_set_fix_setuid() to implicitly adjust capabilities in line with setuid() changes. Changes are made to the new credentials, rather than the task itself as in security_task_post_setuid(). (*) security_task_reparent_to_init(), ->task_reparent_to_init() Removed. Instead the task being reparented to init is referred directly to init's credentials. NOTE! This results in the loss of some state: SELinux's osid no longer records the sid of the thread that forked it. (*) security_key_alloc(), ->key_alloc() (*) security_key_permission(), ->key_permission() Changed. These now take cred pointers rather than task pointers to refer to the security context. (4) sys_capset(). This has been simplified and uses less locking. The LSM functions it calls have been merged. (5) reparent_to_kthreadd(). This gives the current thread the same credentials as init by simply using commit_thread() to point that way. (6) __sigqueue_alloc() and switch_uid() __sigqueue_alloc() can't stop the target task from changing its creds beneath it, so this function gets a reference to the currently applicable user_struct which it then passes into the sigqueue struct it returns if successful. switch_uid() is now called from commit_creds(), and possibly should be folded into that. commit_creds() should take care of protecting __sigqueue_alloc(). (7) [sg]et[ug]id() and co and [sg]et_current_groups. The set functions now all use prepare_creds(), commit_creds() and abort_creds() to build and check a new set of credentials before applying it. security_task_set[ug]id() is called inside the prepared section. This guarantees that nothing else will affect the creds until we've finished. The calling of set_dumpable() has been moved into commit_creds(). Much of the functionality of set_user() has been moved into commit_creds(). The get functions all simply access the data directly. (8) security_task_prctl() and cap_task_prctl(). security_task_prctl() has been modified to return -ENOSYS if it doesn't want to handle a function, or otherwise return the return value directly rather than through an argument. Additionally, cap_task_prctl() now prepares a new set of credentials, even if it doesn't end up using it. (9) Keyrings. A number of changes have been made to the keyrings code: (a) switch_uid_keyring(), copy_keys(), exit_keys() and suid_keys() have all been dropped and built in to the credentials functions directly. They may want separating out again later. (b) key_alloc() and search_process_keyrings() now take a cred pointer rather than a task pointer to specify the security context. (c) copy_creds() gives a new thread within the same thread group a new thread keyring if its parent had one, otherwise it discards the thread keyring. (d) The authorisation key now points directly to the credentials to extend the search into rather pointing to the task that carries them. (e) Installing thread, process or session keyrings causes a new set of credentials to be created, even though it's not strictly necessary for process or session keyrings (they're shared). (10) Usermode helper. The usermode helper code now carries a cred struct pointer in its subprocess_info struct instead of a new session keyring pointer. This set of credentials is derived from init_cred and installed on the new process after it has been cloned. call_usermodehelper_setup() allocates the new credentials and call_usermodehelper_freeinfo() discards them if they haven't been used. A special cred function (prepare_usermodeinfo_creds()) is provided specifically for call_usermodehelper_setup() to call. call_usermodehelper_setkeys() adjusts the credentials to sport the supplied keyring as the new session keyring. (11) SELinux. SELinux has a number of changes, in addition to those to support the LSM interface changes mentioned above: (a) selinux_setprocattr() no longer does its check for whether the current ptracer can access processes with the new SID inside the lock that covers getting the ptracer's SID. Whilst this lock ensures that the check is done with the ptracer pinned, the result is only valid until the lock is released, so there's no point doing it inside the lock. (12) is_single_threaded(). This function has been extracted from selinux_setprocattr() and put into a file of its own in the lib/ directory as join_session_keyring() now wants to use it too. The code in SELinux just checked to see whether a task shared mm_structs with other tasks (CLONE_VM), but that isn't good enough. We really want to know if they're part of the same thread group (CLONE_THREAD). (13) nfsd. The NFS server daemon now has to use the COW credentials to set the credentials it is going to use. It really needs to pass the credentials down to the functions it calls, but it can't do that until other patches in this series have been applied. Signed-off-by: David Howells <dhowells@redhat.com> Acked-by: James Morris <jmorris@namei.org> Signed-off-by: James Morris <jmorris@namei.org>
2008-11-14 07:39:23 +08:00
unsigned long arg4, unsigned long arg5);
void security_task_to_inode(struct task_struct *p, struct inode *inode);
int security_ipc_permission(struct kern_ipc_perm *ipcp, short flag);
void security_ipc_getsecid(struct kern_ipc_perm *ipcp, u32 *secid);
int security_msg_msg_alloc(struct msg_msg *msg);
void security_msg_msg_free(struct msg_msg *msg);
int security_msg_queue_alloc(struct msg_queue *msq);
void security_msg_queue_free(struct msg_queue *msq);
int security_msg_queue_associate(struct msg_queue *msq, int msqflg);
int security_msg_queue_msgctl(struct msg_queue *msq, int cmd);
int security_msg_queue_msgsnd(struct msg_queue *msq,
struct msg_msg *msg, int msqflg);
int security_msg_queue_msgrcv(struct msg_queue *msq, struct msg_msg *msg,
struct task_struct *target, long type, int mode);
int security_shm_alloc(struct shmid_kernel *shp);
void security_shm_free(struct shmid_kernel *shp);
int security_shm_associate(struct shmid_kernel *shp, int shmflg);
int security_shm_shmctl(struct shmid_kernel *shp, int cmd);
int security_shm_shmat(struct shmid_kernel *shp, char __user *shmaddr, int shmflg);
int security_sem_alloc(struct sem_array *sma);
void security_sem_free(struct sem_array *sma);
int security_sem_associate(struct sem_array *sma, int semflg);
int security_sem_semctl(struct sem_array *sma, int cmd);
int security_sem_semop(struct sem_array *sma, struct sembuf *sops,
unsigned nsops, int alter);
void security_d_instantiate(struct dentry *dentry, struct inode *inode);
int security_getprocattr(struct task_struct *p, char *name, char **value);
int security_setprocattr(struct task_struct *p, char *name, void *value, size_t size);
int security_netlink_send(struct sock *sk, struct sk_buff *skb);
int security_netlink_recv(struct sk_buff *skb, int cap);
int security_secid_to_secctx(u32 secid, char **secdata, u32 *seclen);
int security_secctx_to_secid(const char *secdata, u32 seclen, u32 *secid);
void security_release_secctx(char *secdata, u32 seclen);
LSM/SELinux: inode_{get,set,notify}secctx hooks to access LSM security context information. This patch introduces three new hooks. The inode_getsecctx hook is used to get all relevant information from an LSM about an inode. The inode_setsecctx is used to set both the in-core and on-disk state for the inode based on a context derived from inode_getsecctx.The final hook inode_notifysecctx will notify the LSM of a change for the in-core state of the inode in question. These hooks are for use in the labeled NFS code and addresses concerns of how to set security on an inode in a multi-xattr LSM. For historical reasons Stephen Smalley's explanation of the reason for these hooks is pasted below. Quote Stephen Smalley inode_setsecctx: Change the security context of an inode. Updates the in core security context managed by the security module and invokes the fs code as needed (via __vfs_setxattr_noperm) to update any backing xattrs that represent the context. Example usage: NFS server invokes this hook to change the security context in its incore inode and on the backing file system to a value provided by the client on a SETATTR operation. inode_notifysecctx: Notify the security module of what the security context of an inode should be. Initializes the incore security context managed by the security module for this inode. Example usage: NFS client invokes this hook to initialize the security context in its incore inode to the value provided by the server for the file when the server returned the file's attributes to the client. Signed-off-by: David P. Quigley <dpquigl@tycho.nsa.gov> Acked-by: Serge Hallyn <serue@us.ibm.com> Signed-off-by: James Morris <jmorris@namei.org>
2009-09-04 02:25:57 +08:00
int security_inode_notifysecctx(struct inode *inode, void *ctx, u32 ctxlen);
int security_inode_setsecctx(struct dentry *dentry, void *ctx, u32 ctxlen);
int security_inode_getsecctx(struct inode *inode, void **ctx, u32 *ctxlen);
#else /* CONFIG_SECURITY */
struct security_mnt_opts {
};
static inline void security_init_mnt_opts(struct security_mnt_opts *opts)
{
}
static inline void security_free_mnt_opts(struct security_mnt_opts *opts)
{
}
/*
* This is the default capabilities functionality. Most of these functions
* are just stubbed out, but a few must call the proper capable code.
*/
static inline int security_init(void)
{
return 0;
}
static inline int security_ptrace_access_check(struct task_struct *child,
security: Fix setting of PF_SUPERPRIV by __capable() Fix the setting of PF_SUPERPRIV by __capable() as it could corrupt the flags the target process if that is not the current process and it is trying to change its own flags in a different way at the same time. __capable() is using neither atomic ops nor locking to protect t->flags. This patch removes __capable() and introduces has_capability() that doesn't set PF_SUPERPRIV on the process being queried. This patch further splits security_ptrace() in two: (1) security_ptrace_may_access(). This passes judgement on whether one process may access another only (PTRACE_MODE_ATTACH for ptrace() and PTRACE_MODE_READ for /proc), and takes a pointer to the child process. current is the parent. (2) security_ptrace_traceme(). This passes judgement on PTRACE_TRACEME only, and takes only a pointer to the parent process. current is the child. In Smack and commoncap, this uses has_capability() to determine whether the parent will be permitted to use PTRACE_ATTACH if normal checks fail. This does not set PF_SUPERPRIV. Two of the instances of __capable() actually only act on current, and so have been changed to calls to capable(). Of the places that were using __capable(): (1) The OOM killer calls __capable() thrice when weighing the killability of a process. All of these now use has_capability(). (2) cap_ptrace() and smack_ptrace() were using __capable() to check to see whether the parent was allowed to trace any process. As mentioned above, these have been split. For PTRACE_ATTACH and /proc, capable() is now used, and for PTRACE_TRACEME, has_capability() is used. (3) cap_safe_nice() only ever saw current, so now uses capable(). (4) smack_setprocattr() rejected accesses to tasks other than current just after calling __capable(), so the order of these two tests have been switched and capable() is used instead. (5) In smack_file_send_sigiotask(), we need to allow privileged processes to receive SIGIO on files they're manipulating. (6) In smack_task_wait(), we let a process wait for a privileged process, whether or not the process doing the waiting is privileged. I've tested this with the LTP SELinux and syscalls testscripts. Signed-off-by: David Howells <dhowells@redhat.com> Acked-by: Serge Hallyn <serue@us.ibm.com> Acked-by: Casey Schaufler <casey@schaufler-ca.com> Acked-by: Andrew G. Morgan <morgan@kernel.org> Acked-by: Al Viro <viro@zeniv.linux.org.uk> Signed-off-by: James Morris <jmorris@namei.org>
2008-08-14 18:37:28 +08:00
unsigned int mode)
{
return cap_ptrace_access_check(child, mode);
security: Fix setting of PF_SUPERPRIV by __capable() Fix the setting of PF_SUPERPRIV by __capable() as it could corrupt the flags the target process if that is not the current process and it is trying to change its own flags in a different way at the same time. __capable() is using neither atomic ops nor locking to protect t->flags. This patch removes __capable() and introduces has_capability() that doesn't set PF_SUPERPRIV on the process being queried. This patch further splits security_ptrace() in two: (1) security_ptrace_may_access(). This passes judgement on whether one process may access another only (PTRACE_MODE_ATTACH for ptrace() and PTRACE_MODE_READ for /proc), and takes a pointer to the child process. current is the parent. (2) security_ptrace_traceme(). This passes judgement on PTRACE_TRACEME only, and takes only a pointer to the parent process. current is the child. In Smack and commoncap, this uses has_capability() to determine whether the parent will be permitted to use PTRACE_ATTACH if normal checks fail. This does not set PF_SUPERPRIV. Two of the instances of __capable() actually only act on current, and so have been changed to calls to capable(). Of the places that were using __capable(): (1) The OOM killer calls __capable() thrice when weighing the killability of a process. All of these now use has_capability(). (2) cap_ptrace() and smack_ptrace() were using __capable() to check to see whether the parent was allowed to trace any process. As mentioned above, these have been split. For PTRACE_ATTACH and /proc, capable() is now used, and for PTRACE_TRACEME, has_capability() is used. (3) cap_safe_nice() only ever saw current, so now uses capable(). (4) smack_setprocattr() rejected accesses to tasks other than current just after calling __capable(), so the order of these two tests have been switched and capable() is used instead. (5) In smack_file_send_sigiotask(), we need to allow privileged processes to receive SIGIO on files they're manipulating. (6) In smack_task_wait(), we let a process wait for a privileged process, whether or not the process doing the waiting is privileged. I've tested this with the LTP SELinux and syscalls testscripts. Signed-off-by: David Howells <dhowells@redhat.com> Acked-by: Serge Hallyn <serue@us.ibm.com> Acked-by: Casey Schaufler <casey@schaufler-ca.com> Acked-by: Andrew G. Morgan <morgan@kernel.org> Acked-by: Al Viro <viro@zeniv.linux.org.uk> Signed-off-by: James Morris <jmorris@namei.org>
2008-08-14 18:37:28 +08:00
}
static inline int security_ptrace_traceme(struct task_struct *parent)
{
security: Fix setting of PF_SUPERPRIV by __capable() Fix the setting of PF_SUPERPRIV by __capable() as it could corrupt the flags the target process if that is not the current process and it is trying to change its own flags in a different way at the same time. __capable() is using neither atomic ops nor locking to protect t->flags. This patch removes __capable() and introduces has_capability() that doesn't set PF_SUPERPRIV on the process being queried. This patch further splits security_ptrace() in two: (1) security_ptrace_may_access(). This passes judgement on whether one process may access another only (PTRACE_MODE_ATTACH for ptrace() and PTRACE_MODE_READ for /proc), and takes a pointer to the child process. current is the parent. (2) security_ptrace_traceme(). This passes judgement on PTRACE_TRACEME only, and takes only a pointer to the parent process. current is the child. In Smack and commoncap, this uses has_capability() to determine whether the parent will be permitted to use PTRACE_ATTACH if normal checks fail. This does not set PF_SUPERPRIV. Two of the instances of __capable() actually only act on current, and so have been changed to calls to capable(). Of the places that were using __capable(): (1) The OOM killer calls __capable() thrice when weighing the killability of a process. All of these now use has_capability(). (2) cap_ptrace() and smack_ptrace() were using __capable() to check to see whether the parent was allowed to trace any process. As mentioned above, these have been split. For PTRACE_ATTACH and /proc, capable() is now used, and for PTRACE_TRACEME, has_capability() is used. (3) cap_safe_nice() only ever saw current, so now uses capable(). (4) smack_setprocattr() rejected accesses to tasks other than current just after calling __capable(), so the order of these two tests have been switched and capable() is used instead. (5) In smack_file_send_sigiotask(), we need to allow privileged processes to receive SIGIO on files they're manipulating. (6) In smack_task_wait(), we let a process wait for a privileged process, whether or not the process doing the waiting is privileged. I've tested this with the LTP SELinux and syscalls testscripts. Signed-off-by: David Howells <dhowells@redhat.com> Acked-by: Serge Hallyn <serue@us.ibm.com> Acked-by: Casey Schaufler <casey@schaufler-ca.com> Acked-by: Andrew G. Morgan <morgan@kernel.org> Acked-by: Al Viro <viro@zeniv.linux.org.uk> Signed-off-by: James Morris <jmorris@namei.org>
2008-08-14 18:37:28 +08:00
return cap_ptrace_traceme(parent);
}
static inline int security_capget(struct task_struct *target,
kernel_cap_t *effective,
kernel_cap_t *inheritable,
kernel_cap_t *permitted)
{
return cap_capget(target, effective, inheritable, permitted);
}
CRED: Inaugurate COW credentials Inaugurate copy-on-write credentials management. This uses RCU to manage the credentials pointer in the task_struct with respect to accesses by other tasks. A process may only modify its own credentials, and so does not need locking to access or modify its own credentials. A mutex (cred_replace_mutex) is added to the task_struct to control the effect of PTRACE_ATTACHED on credential calculations, particularly with respect to execve(). With this patch, the contents of an active credentials struct may not be changed directly; rather a new set of credentials must be prepared, modified and committed using something like the following sequence of events: struct cred *new = prepare_creds(); int ret = blah(new); if (ret < 0) { abort_creds(new); return ret; } return commit_creds(new); There are some exceptions to this rule: the keyrings pointed to by the active credentials may be instantiated - keyrings violate the COW rule as managing COW keyrings is tricky, given that it is possible for a task to directly alter the keys in a keyring in use by another task. To help enforce this, various pointers to sets of credentials, such as those in the task_struct, are declared const. The purpose of this is compile-time discouragement of altering credentials through those pointers. Once a set of credentials has been made public through one of these pointers, it may not be modified, except under special circumstances: (1) Its reference count may incremented and decremented. (2) The keyrings to which it points may be modified, but not replaced. The only safe way to modify anything else is to create a replacement and commit using the functions described in Documentation/credentials.txt (which will be added by a later patch). This patch and the preceding patches have been tested with the LTP SELinux testsuite. This patch makes several logical sets of alteration: (1) execve(). This now prepares and commits credentials in various places in the security code rather than altering the current creds directly. (2) Temporary credential overrides. do_coredump() and sys_faccessat() now prepare their own credentials and temporarily override the ones currently on the acting thread, whilst preventing interference from other threads by holding cred_replace_mutex on the thread being dumped. This will be replaced in a future patch by something that hands down the credentials directly to the functions being called, rather than altering the task's objective credentials. (3) LSM interface. A number of functions have been changed, added or removed: (*) security_capset_check(), ->capset_check() (*) security_capset_set(), ->capset_set() Removed in favour of security_capset(). (*) security_capset(), ->capset() New. This is passed a pointer to the new creds, a pointer to the old creds and the proposed capability sets. It should fill in the new creds or return an error. All pointers, barring the pointer to the new creds, are now const. (*) security_bprm_apply_creds(), ->bprm_apply_creds() Changed; now returns a value, which will cause the process to be killed if it's an error. (*) security_task_alloc(), ->task_alloc_security() Removed in favour of security_prepare_creds(). (*) security_cred_free(), ->cred_free() New. Free security data attached to cred->security. (*) security_prepare_creds(), ->cred_prepare() New. Duplicate any security data attached to cred->security. (*) security_commit_creds(), ->cred_commit() New. Apply any security effects for the upcoming installation of new security by commit_creds(). (*) security_task_post_setuid(), ->task_post_setuid() Removed in favour of security_task_fix_setuid(). (*) security_task_fix_setuid(), ->task_fix_setuid() Fix up the proposed new credentials for setuid(). This is used by cap_set_fix_setuid() to implicitly adjust capabilities in line with setuid() changes. Changes are made to the new credentials, rather than the task itself as in security_task_post_setuid(). (*) security_task_reparent_to_init(), ->task_reparent_to_init() Removed. Instead the task being reparented to init is referred directly to init's credentials. NOTE! This results in the loss of some state: SELinux's osid no longer records the sid of the thread that forked it. (*) security_key_alloc(), ->key_alloc() (*) security_key_permission(), ->key_permission() Changed. These now take cred pointers rather than task pointers to refer to the security context. (4) sys_capset(). This has been simplified and uses less locking. The LSM functions it calls have been merged. (5) reparent_to_kthreadd(). This gives the current thread the same credentials as init by simply using commit_thread() to point that way. (6) __sigqueue_alloc() and switch_uid() __sigqueue_alloc() can't stop the target task from changing its creds beneath it, so this function gets a reference to the currently applicable user_struct which it then passes into the sigqueue struct it returns if successful. switch_uid() is now called from commit_creds(), and possibly should be folded into that. commit_creds() should take care of protecting __sigqueue_alloc(). (7) [sg]et[ug]id() and co and [sg]et_current_groups. The set functions now all use prepare_creds(), commit_creds() and abort_creds() to build and check a new set of credentials before applying it. security_task_set[ug]id() is called inside the prepared section. This guarantees that nothing else will affect the creds until we've finished. The calling of set_dumpable() has been moved into commit_creds(). Much of the functionality of set_user() has been moved into commit_creds(). The get functions all simply access the data directly. (8) security_task_prctl() and cap_task_prctl(). security_task_prctl() has been modified to return -ENOSYS if it doesn't want to handle a function, or otherwise return the return value directly rather than through an argument. Additionally, cap_task_prctl() now prepares a new set of credentials, even if it doesn't end up using it. (9) Keyrings. A number of changes have been made to the keyrings code: (a) switch_uid_keyring(), copy_keys(), exit_keys() and suid_keys() have all been dropped and built in to the credentials functions directly. They may want separating out again later. (b) key_alloc() and search_process_keyrings() now take a cred pointer rather than a task pointer to specify the security context. (c) copy_creds() gives a new thread within the same thread group a new thread keyring if its parent had one, otherwise it discards the thread keyring. (d) The authorisation key now points directly to the credentials to extend the search into rather pointing to the task that carries them. (e) Installing thread, process or session keyrings causes a new set of credentials to be created, even though it's not strictly necessary for process or session keyrings (they're shared). (10) Usermode helper. The usermode helper code now carries a cred struct pointer in its subprocess_info struct instead of a new session keyring pointer. This set of credentials is derived from init_cred and installed on the new process after it has been cloned. call_usermodehelper_setup() allocates the new credentials and call_usermodehelper_freeinfo() discards them if they haven't been used. A special cred function (prepare_usermodeinfo_creds()) is provided specifically for call_usermodehelper_setup() to call. call_usermodehelper_setkeys() adjusts the credentials to sport the supplied keyring as the new session keyring. (11) SELinux. SELinux has a number of changes, in addition to those to support the LSM interface changes mentioned above: (a) selinux_setprocattr() no longer does its check for whether the current ptracer can access processes with the new SID inside the lock that covers getting the ptracer's SID. Whilst this lock ensures that the check is done with the ptracer pinned, the result is only valid until the lock is released, so there's no point doing it inside the lock. (12) is_single_threaded(). This function has been extracted from selinux_setprocattr() and put into a file of its own in the lib/ directory as join_session_keyring() now wants to use it too. The code in SELinux just checked to see whether a task shared mm_structs with other tasks (CLONE_VM), but that isn't good enough. We really want to know if they're part of the same thread group (CLONE_THREAD). (13) nfsd. The NFS server daemon now has to use the COW credentials to set the credentials it is going to use. It really needs to pass the credentials down to the functions it calls, but it can't do that until other patches in this series have been applied. Signed-off-by: David Howells <dhowells@redhat.com> Acked-by: James Morris <jmorris@namei.org> Signed-off-by: James Morris <jmorris@namei.org>
2008-11-14 07:39:23 +08:00
static inline int security_capset(struct cred *new,
const struct cred *old,
const kernel_cap_t *effective,
const kernel_cap_t *inheritable,
const kernel_cap_t *permitted)
{
CRED: Inaugurate COW credentials Inaugurate copy-on-write credentials management. This uses RCU to manage the credentials pointer in the task_struct with respect to accesses by other tasks. A process may only modify its own credentials, and so does not need locking to access or modify its own credentials. A mutex (cred_replace_mutex) is added to the task_struct to control the effect of PTRACE_ATTACHED on credential calculations, particularly with respect to execve(). With this patch, the contents of an active credentials struct may not be changed directly; rather a new set of credentials must be prepared, modified and committed using something like the following sequence of events: struct cred *new = prepare_creds(); int ret = blah(new); if (ret < 0) { abort_creds(new); return ret; } return commit_creds(new); There are some exceptions to this rule: the keyrings pointed to by the active credentials may be instantiated - keyrings violate the COW rule as managing COW keyrings is tricky, given that it is possible for a task to directly alter the keys in a keyring in use by another task. To help enforce this, various pointers to sets of credentials, such as those in the task_struct, are declared const. The purpose of this is compile-time discouragement of altering credentials through those pointers. Once a set of credentials has been made public through one of these pointers, it may not be modified, except under special circumstances: (1) Its reference count may incremented and decremented. (2) The keyrings to which it points may be modified, but not replaced. The only safe way to modify anything else is to create a replacement and commit using the functions described in Documentation/credentials.txt (which will be added by a later patch). This patch and the preceding patches have been tested with the LTP SELinux testsuite. This patch makes several logical sets of alteration: (1) execve(). This now prepares and commits credentials in various places in the security code rather than altering the current creds directly. (2) Temporary credential overrides. do_coredump() and sys_faccessat() now prepare their own credentials and temporarily override the ones currently on the acting thread, whilst preventing interference from other threads by holding cred_replace_mutex on the thread being dumped. This will be replaced in a future patch by something that hands down the credentials directly to the functions being called, rather than altering the task's objective credentials. (3) LSM interface. A number of functions have been changed, added or removed: (*) security_capset_check(), ->capset_check() (*) security_capset_set(), ->capset_set() Removed in favour of security_capset(). (*) security_capset(), ->capset() New. This is passed a pointer to the new creds, a pointer to the old creds and the proposed capability sets. It should fill in the new creds or return an error. All pointers, barring the pointer to the new creds, are now const. (*) security_bprm_apply_creds(), ->bprm_apply_creds() Changed; now returns a value, which will cause the process to be killed if it's an error. (*) security_task_alloc(), ->task_alloc_security() Removed in favour of security_prepare_creds(). (*) security_cred_free(), ->cred_free() New. Free security data attached to cred->security. (*) security_prepare_creds(), ->cred_prepare() New. Duplicate any security data attached to cred->security. (*) security_commit_creds(), ->cred_commit() New. Apply any security effects for the upcoming installation of new security by commit_creds(). (*) security_task_post_setuid(), ->task_post_setuid() Removed in favour of security_task_fix_setuid(). (*) security_task_fix_setuid(), ->task_fix_setuid() Fix up the proposed new credentials for setuid(). This is used by cap_set_fix_setuid() to implicitly adjust capabilities in line with setuid() changes. Changes are made to the new credentials, rather than the task itself as in security_task_post_setuid(). (*) security_task_reparent_to_init(), ->task_reparent_to_init() Removed. Instead the task being reparented to init is referred directly to init's credentials. NOTE! This results in the loss of some state: SELinux's osid no longer records the sid of the thread that forked it. (*) security_key_alloc(), ->key_alloc() (*) security_key_permission(), ->key_permission() Changed. These now take cred pointers rather than task pointers to refer to the security context. (4) sys_capset(). This has been simplified and uses less locking. The LSM functions it calls have been merged. (5) reparent_to_kthreadd(). This gives the current thread the same credentials as init by simply using commit_thread() to point that way. (6) __sigqueue_alloc() and switch_uid() __sigqueue_alloc() can't stop the target task from changing its creds beneath it, so this function gets a reference to the currently applicable user_struct which it then passes into the sigqueue struct it returns if successful. switch_uid() is now called from commit_creds(), and possibly should be folded into that. commit_creds() should take care of protecting __sigqueue_alloc(). (7) [sg]et[ug]id() and co and [sg]et_current_groups. The set functions now all use prepare_creds(), commit_creds() and abort_creds() to build and check a new set of credentials before applying it. security_task_set[ug]id() is called inside the prepared section. This guarantees that nothing else will affect the creds until we've finished. The calling of set_dumpable() has been moved into commit_creds(). Much of the functionality of set_user() has been moved into commit_creds(). The get functions all simply access the data directly. (8) security_task_prctl() and cap_task_prctl(). security_task_prctl() has been modified to return -ENOSYS if it doesn't want to handle a function, or otherwise return the return value directly rather than through an argument. Additionally, cap_task_prctl() now prepares a new set of credentials, even if it doesn't end up using it. (9) Keyrings. A number of changes have been made to the keyrings code: (a) switch_uid_keyring(), copy_keys(), exit_keys() and suid_keys() have all been dropped and built in to the credentials functions directly. They may want separating out again later. (b) key_alloc() and search_process_keyrings() now take a cred pointer rather than a task pointer to specify the security context. (c) copy_creds() gives a new thread within the same thread group a new thread keyring if its parent had one, otherwise it discards the thread keyring. (d) The authorisation key now points directly to the credentials to extend the search into rather pointing to the task that carries them. (e) Installing thread, process or session keyrings causes a new set of credentials to be created, even though it's not strictly necessary for process or session keyrings (they're shared). (10) Usermode helper. The usermode helper code now carries a cred struct pointer in its subprocess_info struct instead of a new session keyring pointer. This set of credentials is derived from init_cred and installed on the new process after it has been cloned. call_usermodehelper_setup() allocates the new credentials and call_usermodehelper_freeinfo() discards them if they haven't been used. A special cred function (prepare_usermodeinfo_creds()) is provided specifically for call_usermodehelper_setup() to call. call_usermodehelper_setkeys() adjusts the credentials to sport the supplied keyring as the new session keyring. (11) SELinux. SELinux has a number of changes, in addition to those to support the LSM interface changes mentioned above: (a) selinux_setprocattr() no longer does its check for whether the current ptracer can access processes with the new SID inside the lock that covers getting the ptracer's SID. Whilst this lock ensures that the check is done with the ptracer pinned, the result is only valid until the lock is released, so there's no point doing it inside the lock. (12) is_single_threaded(). This function has been extracted from selinux_setprocattr() and put into a file of its own in the lib/ directory as join_session_keyring() now wants to use it too. The code in SELinux just checked to see whether a task shared mm_structs with other tasks (CLONE_VM), but that isn't good enough. We really want to know if they're part of the same thread group (CLONE_THREAD). (13) nfsd. The NFS server daemon now has to use the COW credentials to set the credentials it is going to use. It really needs to pass the credentials down to the functions it calls, but it can't do that until other patches in this series have been applied. Signed-off-by: David Howells <dhowells@redhat.com> Acked-by: James Morris <jmorris@namei.org> Signed-off-by: James Morris <jmorris@namei.org>
2008-11-14 07:39:23 +08:00
return cap_capset(new, old, effective, inheritable, permitted);
}
static inline int security_capable(const struct cred *cred, int cap)
{
return cap_capable(current, cred, cap, SECURITY_CAP_AUDIT);
}
CRED: Fix regression in cap_capable() as shown up by sys_faccessat() [ver #3] Fix a regression in cap_capable() due to: commit 3b11a1decef07c19443d24ae926982bc8ec9f4c0 Author: David Howells <dhowells@redhat.com> Date: Fri Nov 14 10:39:26 2008 +1100 CRED: Differentiate objective and effective subjective credentials on a task The problem is that the above patch allows a process to have two sets of credentials, and for the most part uses the subjective credentials when accessing current's creds. There is, however, one exception: cap_capable(), and thus capable(), uses the real/objective credentials of the target task, whether or not it is the current task. Ordinarily this doesn't matter, since usually the two cred pointers in current point to the same set of creds. However, sys_faccessat() makes use of this facility to override the credentials of the calling process to make its test, without affecting the creds as seen from other processes. One of the things sys_faccessat() does is to make an adjustment to the effective capabilities mask, which cap_capable(), as it stands, then ignores. The affected capability check is in generic_permission(): if (!(mask & MAY_EXEC) || execute_ok(inode)) if (capable(CAP_DAC_OVERRIDE)) return 0; This change passes the set of credentials to be tested down into the commoncap and SELinux code. The security functions called by capable() and has_capability() select the appropriate set of credentials from the process being checked. This can be tested by compiling the following program from the XFS testsuite: /* * t_access_root.c - trivial test program to show permission bug. * * Written by Michael Kerrisk - copyright ownership not pursued. * Sourced from: http://linux.derkeiler.com/Mailing-Lists/Kernel/2003-10/6030.html */ #include <limits.h> #include <unistd.h> #include <stdio.h> #include <stdlib.h> #include <fcntl.h> #include <sys/stat.h> #define UID 500 #define GID 100 #define PERM 0 #define TESTPATH "/tmp/t_access" static void errExit(char *msg) { perror(msg); exit(EXIT_FAILURE); } /* errExit */ static void accessTest(char *file, int mask, char *mstr) { printf("access(%s, %s) returns %d\n", file, mstr, access(file, mask)); } /* accessTest */ int main(int argc, char *argv[]) { int fd, perm, uid, gid; char *testpath; char cmd[PATH_MAX + 20]; testpath = (argc > 1) ? argv[1] : TESTPATH; perm = (argc > 2) ? strtoul(argv[2], NULL, 8) : PERM; uid = (argc > 3) ? atoi(argv[3]) : UID; gid = (argc > 4) ? atoi(argv[4]) : GID; unlink(testpath); fd = open(testpath, O_RDWR | O_CREAT, 0); if (fd == -1) errExit("open"); if (fchown(fd, uid, gid) == -1) errExit("fchown"); if (fchmod(fd, perm) == -1) errExit("fchmod"); close(fd); snprintf(cmd, sizeof(cmd), "ls -l %s", testpath); system(cmd); if (seteuid(uid) == -1) errExit("seteuid"); accessTest(testpath, 0, "0"); accessTest(testpath, R_OK, "R_OK"); accessTest(testpath, W_OK, "W_OK"); accessTest(testpath, X_OK, "X_OK"); accessTest(testpath, R_OK | W_OK, "R_OK | W_OK"); accessTest(testpath, R_OK | X_OK, "R_OK | X_OK"); accessTest(testpath, W_OK | X_OK, "W_OK | X_OK"); accessTest(testpath, R_OK | W_OK | X_OK, "R_OK | W_OK | X_OK"); exit(EXIT_SUCCESS); } /* main */ This can be run against an Ext3 filesystem as well as against an XFS filesystem. If successful, it will show: [root@andromeda src]# ./t_access_root /tmp/xxx 0 4043 4043 ---------- 1 dhowells dhowells 0 2008-12-31 03:00 /tmp/xxx access(/tmp/xxx, 0) returns 0 access(/tmp/xxx, R_OK) returns 0 access(/tmp/xxx, W_OK) returns 0 access(/tmp/xxx, X_OK) returns -1 access(/tmp/xxx, R_OK | W_OK) returns 0 access(/tmp/xxx, R_OK | X_OK) returns -1 access(/tmp/xxx, W_OK | X_OK) returns -1 access(/tmp/xxx, R_OK | W_OK | X_OK) returns -1 If unsuccessful, it will show: [root@andromeda src]# ./t_access_root /tmp/xxx 0 4043 4043 ---------- 1 dhowells dhowells 0 2008-12-31 02:56 /tmp/xxx access(/tmp/xxx, 0) returns 0 access(/tmp/xxx, R_OK) returns -1 access(/tmp/xxx, W_OK) returns -1 access(/tmp/xxx, X_OK) returns -1 access(/tmp/xxx, R_OK | W_OK) returns -1 access(/tmp/xxx, R_OK | X_OK) returns -1 access(/tmp/xxx, W_OK | X_OK) returns -1 access(/tmp/xxx, R_OK | W_OK | X_OK) returns -1 I've also tested the fix with the SELinux and syscalls LTP testsuites. Signed-off-by: David Howells <dhowells@redhat.com> Tested-by: J. Bruce Fields <bfields@citi.umich.edu> Acked-by: Serge Hallyn <serue@us.ibm.com> Signed-off-by: James Morris <jmorris@namei.org>
2009-01-07 06:27:01 +08:00
static inline int security_real_capable(struct task_struct *tsk, int cap)
{
CRED: Fix regression in cap_capable() as shown up by sys_faccessat() [ver #3] Fix a regression in cap_capable() due to: commit 3b11a1decef07c19443d24ae926982bc8ec9f4c0 Author: David Howells <dhowells@redhat.com> Date: Fri Nov 14 10:39:26 2008 +1100 CRED: Differentiate objective and effective subjective credentials on a task The problem is that the above patch allows a process to have two sets of credentials, and for the most part uses the subjective credentials when accessing current's creds. There is, however, one exception: cap_capable(), and thus capable(), uses the real/objective credentials of the target task, whether or not it is the current task. Ordinarily this doesn't matter, since usually the two cred pointers in current point to the same set of creds. However, sys_faccessat() makes use of this facility to override the credentials of the calling process to make its test, without affecting the creds as seen from other processes. One of the things sys_faccessat() does is to make an adjustment to the effective capabilities mask, which cap_capable(), as it stands, then ignores. The affected capability check is in generic_permission(): if (!(mask & MAY_EXEC) || execute_ok(inode)) if (capable(CAP_DAC_OVERRIDE)) return 0; This change passes the set of credentials to be tested down into the commoncap and SELinux code. The security functions called by capable() and has_capability() select the appropriate set of credentials from the process being checked. This can be tested by compiling the following program from the XFS testsuite: /* * t_access_root.c - trivial test program to show permission bug. * * Written by Michael Kerrisk - copyright ownership not pursued. * Sourced from: http://linux.derkeiler.com/Mailing-Lists/Kernel/2003-10/6030.html */ #include <limits.h> #include <unistd.h> #include <stdio.h> #include <stdlib.h> #include <fcntl.h> #include <sys/stat.h> #define UID 500 #define GID 100 #define PERM 0 #define TESTPATH "/tmp/t_access" static void errExit(char *msg) { perror(msg); exit(EXIT_FAILURE); } /* errExit */ static void accessTest(char *file, int mask, char *mstr) { printf("access(%s, %s) returns %d\n", file, mstr, access(file, mask)); } /* accessTest */ int main(int argc, char *argv[]) { int fd, perm, uid, gid; char *testpath; char cmd[PATH_MAX + 20]; testpath = (argc > 1) ? argv[1] : TESTPATH; perm = (argc > 2) ? strtoul(argv[2], NULL, 8) : PERM; uid = (argc > 3) ? atoi(argv[3]) : UID; gid = (argc > 4) ? atoi(argv[4]) : GID; unlink(testpath); fd = open(testpath, O_RDWR | O_CREAT, 0); if (fd == -1) errExit("open"); if (fchown(fd, uid, gid) == -1) errExit("fchown"); if (fchmod(fd, perm) == -1) errExit("fchmod"); close(fd); snprintf(cmd, sizeof(cmd), "ls -l %s", testpath); system(cmd); if (seteuid(uid) == -1) errExit("seteuid"); accessTest(testpath, 0, "0"); accessTest(testpath, R_OK, "R_OK"); accessTest(testpath, W_OK, "W_OK"); accessTest(testpath, X_OK, "X_OK"); accessTest(testpath, R_OK | W_OK, "R_OK | W_OK"); accessTest(testpath, R_OK | X_OK, "R_OK | X_OK"); accessTest(testpath, W_OK | X_OK, "W_OK | X_OK"); accessTest(testpath, R_OK | W_OK | X_OK, "R_OK | W_OK | X_OK"); exit(EXIT_SUCCESS); } /* main */ This can be run against an Ext3 filesystem as well as against an XFS filesystem. If successful, it will show: [root@andromeda src]# ./t_access_root /tmp/xxx 0 4043 4043 ---------- 1 dhowells dhowells 0 2008-12-31 03:00 /tmp/xxx access(/tmp/xxx, 0) returns 0 access(/tmp/xxx, R_OK) returns 0 access(/tmp/xxx, W_OK) returns 0 access(/tmp/xxx, X_OK) returns -1 access(/tmp/xxx, R_OK | W_OK) returns 0 access(/tmp/xxx, R_OK | X_OK) returns -1 access(/tmp/xxx, W_OK | X_OK) returns -1 access(/tmp/xxx, R_OK | W_OK | X_OK) returns -1 If unsuccessful, it will show: [root@andromeda src]# ./t_access_root /tmp/xxx 0 4043 4043 ---------- 1 dhowells dhowells 0 2008-12-31 02:56 /tmp/xxx access(/tmp/xxx, 0) returns 0 access(/tmp/xxx, R_OK) returns -1 access(/tmp/xxx, W_OK) returns -1 access(/tmp/xxx, X_OK) returns -1 access(/tmp/xxx, R_OK | W_OK) returns -1 access(/tmp/xxx, R_OK | X_OK) returns -1 access(/tmp/xxx, W_OK | X_OK) returns -1 access(/tmp/xxx, R_OK | W_OK | X_OK) returns -1 I've also tested the fix with the SELinux and syscalls LTP testsuites. Signed-off-by: David Howells <dhowells@redhat.com> Tested-by: J. Bruce Fields <bfields@citi.umich.edu> Acked-by: Serge Hallyn <serue@us.ibm.com> Signed-off-by: James Morris <jmorris@namei.org>
2009-01-07 06:27:01 +08:00
int ret;
rcu_read_lock();
ret = cap_capable(tsk, __task_cred(tsk), cap, SECURITY_CAP_AUDIT);
rcu_read_unlock();
return ret;
}
static inline
int security_real_capable_noaudit(struct task_struct *tsk, int cap)
{
int ret;
rcu_read_lock();
ret = cap_capable(tsk, __task_cred(tsk), cap,
SECURITY_CAP_NOAUDIT);
rcu_read_unlock();
return ret;
}
static inline int security_sysctl(struct ctl_table *table, int op)
{
return 0;
}
static inline int security_quotactl(int cmds, int type, int id,
struct super_block *sb)
{
return 0;
}
static inline int security_quota_on(struct dentry *dentry)
{
return 0;
}
static inline int security_syslog(int type)
{
return 0;
}
static inline int security_settime(struct timespec *ts, struct timezone *tz)
{
return cap_settime(ts, tz);
}
static inline int security_vm_enough_memory(long pages)
{
WARN_ON(current->mm == NULL);
return cap_vm_enough_memory(current->mm, pages);
}
static inline int security_vm_enough_memory_mm(struct mm_struct *mm, long pages)
{
WARN_ON(mm == NULL);
return cap_vm_enough_memory(mm, pages);
}
static inline int security_vm_enough_memory_kern(long pages)
{
/* If current->mm is a kernel thread then we will pass NULL,
for this specific case that is fine */
return cap_vm_enough_memory(current->mm, pages);
}
CRED: Make execve() take advantage of copy-on-write credentials Make execve() take advantage of copy-on-write credentials, allowing it to set up the credentials in advance, and then commit the whole lot after the point of no return. This patch and the preceding patches have been tested with the LTP SELinux testsuite. This patch makes several logical sets of alteration: (1) execve(). The credential bits from struct linux_binprm are, for the most part, replaced with a single credentials pointer (bprm->cred). This means that all the creds can be calculated in advance and then applied at the point of no return with no possibility of failure. I would like to replace bprm->cap_effective with: cap_isclear(bprm->cap_effective) but this seems impossible due to special behaviour for processes of pid 1 (they always retain their parent's capability masks where normally they'd be changed - see cap_bprm_set_creds()). The following sequence of events now happens: (a) At the start of do_execve, the current task's cred_exec_mutex is locked to prevent PTRACE_ATTACH from obsoleting the calculation of creds that we make. (a) prepare_exec_creds() is then called to make a copy of the current task's credentials and prepare it. This copy is then assigned to bprm->cred. This renders security_bprm_alloc() and security_bprm_free() unnecessary, and so they've been removed. (b) The determination of unsafe execution is now performed immediately after (a) rather than later on in the code. The result is stored in bprm->unsafe for future reference. (c) prepare_binprm() is called, possibly multiple times. (i) This applies the result of set[ug]id binaries to the new creds attached to bprm->cred. Personality bit clearance is recorded, but now deferred on the basis that the exec procedure may yet fail. (ii) This then calls the new security_bprm_set_creds(). This should calculate the new LSM and capability credentials into *bprm->cred. This folds together security_bprm_set() and parts of security_bprm_apply_creds() (these two have been removed). Anything that might fail must be done at this point. (iii) bprm->cred_prepared is set to 1. bprm->cred_prepared is 0 on the first pass of the security calculations, and 1 on all subsequent passes. This allows SELinux in (ii) to base its calculations only on the initial script and not on the interpreter. (d) flush_old_exec() is called to commit the task to execution. This performs the following steps with regard to credentials: (i) Clear pdeath_signal and set dumpable on certain circumstances that may not be covered by commit_creds(). (ii) Clear any bits in current->personality that were deferred from (c.i). (e) install_exec_creds() [compute_creds() as was] is called to install the new credentials. This performs the following steps with regard to credentials: (i) Calls security_bprm_committing_creds() to apply any security requirements, such as flushing unauthorised files in SELinux, that must be done before the credentials are changed. This is made up of bits of security_bprm_apply_creds() and security_bprm_post_apply_creds(), both of which have been removed. This function is not allowed to fail; anything that might fail must have been done in (c.ii). (ii) Calls commit_creds() to apply the new credentials in a single assignment (more or less). Possibly pdeath_signal and dumpable should be part of struct creds. (iii) Unlocks the task's cred_replace_mutex, thus allowing PTRACE_ATTACH to take place. (iv) Clears The bprm->cred pointer as the credentials it was holding are now immutable. (v) Calls security_bprm_committed_creds() to apply any security alterations that must be done after the creds have been changed. SELinux uses this to flush signals and signal handlers. (f) If an error occurs before (d.i), bprm_free() will call abort_creds() to destroy the proposed new credentials and will then unlock cred_replace_mutex. No changes to the credentials will have been made. (2) LSM interface. A number of functions have been changed, added or removed: (*) security_bprm_alloc(), ->bprm_alloc_security() (*) security_bprm_free(), ->bprm_free_security() Removed in favour of preparing new credentials and modifying those. (*) security_bprm_apply_creds(), ->bprm_apply_creds() (*) security_bprm_post_apply_creds(), ->bprm_post_apply_creds() Removed; split between security_bprm_set_creds(), security_bprm_committing_creds() and security_bprm_committed_creds(). (*) security_bprm_set(), ->bprm_set_security() Removed; folded into security_bprm_set_creds(). (*) security_bprm_set_creds(), ->bprm_set_creds() New. The new credentials in bprm->creds should be checked and set up as appropriate. bprm->cred_prepared is 0 on the first call, 1 on the second and subsequent calls. (*) security_bprm_committing_creds(), ->bprm_committing_creds() (*) security_bprm_committed_creds(), ->bprm_committed_creds() New. Apply the security effects of the new credentials. This includes closing unauthorised files in SELinux. This function may not fail. When the former is called, the creds haven't yet been applied to the process; when the latter is called, they have. The former may access bprm->cred, the latter may not. (3) SELinux. SELinux has a number of changes, in addition to those to support the LSM interface changes mentioned above: (a) The bprm_security_struct struct has been removed in favour of using the credentials-under-construction approach. (c) flush_unauthorized_files() now takes a cred pointer and passes it on to inode_has_perm(), file_has_perm() and dentry_open(). Signed-off-by: David Howells <dhowells@redhat.com> Acked-by: James Morris <jmorris@namei.org> Acked-by: Serge Hallyn <serue@us.ibm.com> Signed-off-by: James Morris <jmorris@namei.org>
2008-11-14 07:39:24 +08:00
static inline int security_bprm_set_creds(struct linux_binprm *bprm)
{
CRED: Make execve() take advantage of copy-on-write credentials Make execve() take advantage of copy-on-write credentials, allowing it to set up the credentials in advance, and then commit the whole lot after the point of no return. This patch and the preceding patches have been tested with the LTP SELinux testsuite. This patch makes several logical sets of alteration: (1) execve(). The credential bits from struct linux_binprm are, for the most part, replaced with a single credentials pointer (bprm->cred). This means that all the creds can be calculated in advance and then applied at the point of no return with no possibility of failure. I would like to replace bprm->cap_effective with: cap_isclear(bprm->cap_effective) but this seems impossible due to special behaviour for processes of pid 1 (they always retain their parent's capability masks where normally they'd be changed - see cap_bprm_set_creds()). The following sequence of events now happens: (a) At the start of do_execve, the current task's cred_exec_mutex is locked to prevent PTRACE_ATTACH from obsoleting the calculation of creds that we make. (a) prepare_exec_creds() is then called to make a copy of the current task's credentials and prepare it. This copy is then assigned to bprm->cred. This renders security_bprm_alloc() and security_bprm_free() unnecessary, and so they've been removed. (b) The determination of unsafe execution is now performed immediately after (a) rather than later on in the code. The result is stored in bprm->unsafe for future reference. (c) prepare_binprm() is called, possibly multiple times. (i) This applies the result of set[ug]id binaries to the new creds attached to bprm->cred. Personality bit clearance is recorded, but now deferred on the basis that the exec procedure may yet fail. (ii) This then calls the new security_bprm_set_creds(). This should calculate the new LSM and capability credentials into *bprm->cred. This folds together security_bprm_set() and parts of security_bprm_apply_creds() (these two have been removed). Anything that might fail must be done at this point. (iii) bprm->cred_prepared is set to 1. bprm->cred_prepared is 0 on the first pass of the security calculations, and 1 on all subsequent passes. This allows SELinux in (ii) to base its calculations only on the initial script and not on the interpreter. (d) flush_old_exec() is called to commit the task to execution. This performs the following steps with regard to credentials: (i) Clear pdeath_signal and set dumpable on certain circumstances that may not be covered by commit_creds(). (ii) Clear any bits in current->personality that were deferred from (c.i). (e) install_exec_creds() [compute_creds() as was] is called to install the new credentials. This performs the following steps with regard to credentials: (i) Calls security_bprm_committing_creds() to apply any security requirements, such as flushing unauthorised files in SELinux, that must be done before the credentials are changed. This is made up of bits of security_bprm_apply_creds() and security_bprm_post_apply_creds(), both of which have been removed. This function is not allowed to fail; anything that might fail must have been done in (c.ii). (ii) Calls commit_creds() to apply the new credentials in a single assignment (more or less). Possibly pdeath_signal and dumpable should be part of struct creds. (iii) Unlocks the task's cred_replace_mutex, thus allowing PTRACE_ATTACH to take place. (iv) Clears The bprm->cred pointer as the credentials it was holding are now immutable. (v) Calls security_bprm_committed_creds() to apply any security alterations that must be done after the creds have been changed. SELinux uses this to flush signals and signal handlers. (f) If an error occurs before (d.i), bprm_free() will call abort_creds() to destroy the proposed new credentials and will then unlock cred_replace_mutex. No changes to the credentials will have been made. (2) LSM interface. A number of functions have been changed, added or removed: (*) security_bprm_alloc(), ->bprm_alloc_security() (*) security_bprm_free(), ->bprm_free_security() Removed in favour of preparing new credentials and modifying those. (*) security_bprm_apply_creds(), ->bprm_apply_creds() (*) security_bprm_post_apply_creds(), ->bprm_post_apply_creds() Removed; split between security_bprm_set_creds(), security_bprm_committing_creds() and security_bprm_committed_creds(). (*) security_bprm_set(), ->bprm_set_security() Removed; folded into security_bprm_set_creds(). (*) security_bprm_set_creds(), ->bprm_set_creds() New. The new credentials in bprm->creds should be checked and set up as appropriate. bprm->cred_prepared is 0 on the first call, 1 on the second and subsequent calls. (*) security_bprm_committing_creds(), ->bprm_committing_creds() (*) security_bprm_committed_creds(), ->bprm_committed_creds() New. Apply the security effects of the new credentials. This includes closing unauthorised files in SELinux. This function may not fail. When the former is called, the creds haven't yet been applied to the process; when the latter is called, they have. The former may access bprm->cred, the latter may not. (3) SELinux. SELinux has a number of changes, in addition to those to support the LSM interface changes mentioned above: (a) The bprm_security_struct struct has been removed in favour of using the credentials-under-construction approach. (c) flush_unauthorized_files() now takes a cred pointer and passes it on to inode_has_perm(), file_has_perm() and dentry_open(). Signed-off-by: David Howells <dhowells@redhat.com> Acked-by: James Morris <jmorris@namei.org> Acked-by: Serge Hallyn <serue@us.ibm.com> Signed-off-by: James Morris <jmorris@namei.org>
2008-11-14 07:39:24 +08:00
return cap_bprm_set_creds(bprm);
}
CRED: Make execve() take advantage of copy-on-write credentials Make execve() take advantage of copy-on-write credentials, allowing it to set up the credentials in advance, and then commit the whole lot after the point of no return. This patch and the preceding patches have been tested with the LTP SELinux testsuite. This patch makes several logical sets of alteration: (1) execve(). The credential bits from struct linux_binprm are, for the most part, replaced with a single credentials pointer (bprm->cred). This means that all the creds can be calculated in advance and then applied at the point of no return with no possibility of failure. I would like to replace bprm->cap_effective with: cap_isclear(bprm->cap_effective) but this seems impossible due to special behaviour for processes of pid 1 (they always retain their parent's capability masks where normally they'd be changed - see cap_bprm_set_creds()). The following sequence of events now happens: (a) At the start of do_execve, the current task's cred_exec_mutex is locked to prevent PTRACE_ATTACH from obsoleting the calculation of creds that we make. (a) prepare_exec_creds() is then called to make a copy of the current task's credentials and prepare it. This copy is then assigned to bprm->cred. This renders security_bprm_alloc() and security_bprm_free() unnecessary, and so they've been removed. (b) The determination of unsafe execution is now performed immediately after (a) rather than later on in the code. The result is stored in bprm->unsafe for future reference. (c) prepare_binprm() is called, possibly multiple times. (i) This applies the result of set[ug]id binaries to the new creds attached to bprm->cred. Personality bit clearance is recorded, but now deferred on the basis that the exec procedure may yet fail. (ii) This then calls the new security_bprm_set_creds(). This should calculate the new LSM and capability credentials into *bprm->cred. This folds together security_bprm_set() and parts of security_bprm_apply_creds() (these two have been removed). Anything that might fail must be done at this point. (iii) bprm->cred_prepared is set to 1. bprm->cred_prepared is 0 on the first pass of the security calculations, and 1 on all subsequent passes. This allows SELinux in (ii) to base its calculations only on the initial script and not on the interpreter. (d) flush_old_exec() is called to commit the task to execution. This performs the following steps with regard to credentials: (i) Clear pdeath_signal and set dumpable on certain circumstances that may not be covered by commit_creds(). (ii) Clear any bits in current->personality that were deferred from (c.i). (e) install_exec_creds() [compute_creds() as was] is called to install the new credentials. This performs the following steps with regard to credentials: (i) Calls security_bprm_committing_creds() to apply any security requirements, such as flushing unauthorised files in SELinux, that must be done before the credentials are changed. This is made up of bits of security_bprm_apply_creds() and security_bprm_post_apply_creds(), both of which have been removed. This function is not allowed to fail; anything that might fail must have been done in (c.ii). (ii) Calls commit_creds() to apply the new credentials in a single assignment (more or less). Possibly pdeath_signal and dumpable should be part of struct creds. (iii) Unlocks the task's cred_replace_mutex, thus allowing PTRACE_ATTACH to take place. (iv) Clears The bprm->cred pointer as the credentials it was holding are now immutable. (v) Calls security_bprm_committed_creds() to apply any security alterations that must be done after the creds have been changed. SELinux uses this to flush signals and signal handlers. (f) If an error occurs before (d.i), bprm_free() will call abort_creds() to destroy the proposed new credentials and will then unlock cred_replace_mutex. No changes to the credentials will have been made. (2) LSM interface. A number of functions have been changed, added or removed: (*) security_bprm_alloc(), ->bprm_alloc_security() (*) security_bprm_free(), ->bprm_free_security() Removed in favour of preparing new credentials and modifying those. (*) security_bprm_apply_creds(), ->bprm_apply_creds() (*) security_bprm_post_apply_creds(), ->bprm_post_apply_creds() Removed; split between security_bprm_set_creds(), security_bprm_committing_creds() and security_bprm_committed_creds(). (*) security_bprm_set(), ->bprm_set_security() Removed; folded into security_bprm_set_creds(). (*) security_bprm_set_creds(), ->bprm_set_creds() New. The new credentials in bprm->creds should be checked and set up as appropriate. bprm->cred_prepared is 0 on the first call, 1 on the second and subsequent calls. (*) security_bprm_committing_creds(), ->bprm_committing_creds() (*) security_bprm_committed_creds(), ->bprm_committed_creds() New. Apply the security effects of the new credentials. This includes closing unauthorised files in SELinux. This function may not fail. When the former is called, the creds haven't yet been applied to the process; when the latter is called, they have. The former may access bprm->cred, the latter may not. (3) SELinux. SELinux has a number of changes, in addition to those to support the LSM interface changes mentioned above: (a) The bprm_security_struct struct has been removed in favour of using the credentials-under-construction approach. (c) flush_unauthorized_files() now takes a cred pointer and passes it on to inode_has_perm(), file_has_perm() and dentry_open(). Signed-off-by: David Howells <dhowells@redhat.com> Acked-by: James Morris <jmorris@namei.org> Acked-by: Serge Hallyn <serue@us.ibm.com> Signed-off-by: James Morris <jmorris@namei.org>
2008-11-14 07:39:24 +08:00
static inline int security_bprm_check(struct linux_binprm *bprm)
{
CRED: Make execve() take advantage of copy-on-write credentials Make execve() take advantage of copy-on-write credentials, allowing it to set up the credentials in advance, and then commit the whole lot after the point of no return. This patch and the preceding patches have been tested with the LTP SELinux testsuite. This patch makes several logical sets of alteration: (1) execve(). The credential bits from struct linux_binprm are, for the most part, replaced with a single credentials pointer (bprm->cred). This means that all the creds can be calculated in advance and then applied at the point of no return with no possibility of failure. I would like to replace bprm->cap_effective with: cap_isclear(bprm->cap_effective) but this seems impossible due to special behaviour for processes of pid 1 (they always retain their parent's capability masks where normally they'd be changed - see cap_bprm_set_creds()). The following sequence of events now happens: (a) At the start of do_execve, the current task's cred_exec_mutex is locked to prevent PTRACE_ATTACH from obsoleting the calculation of creds that we make. (a) prepare_exec_creds() is then called to make a copy of the current task's credentials and prepare it. This copy is then assigned to bprm->cred. This renders security_bprm_alloc() and security_bprm_free() unnecessary, and so they've been removed. (b) The determination of unsafe execution is now performed immediately after (a) rather than later on in the code. The result is stored in bprm->unsafe for future reference. (c) prepare_binprm() is called, possibly multiple times. (i) This applies the result of set[ug]id binaries to the new creds attached to bprm->cred. Personality bit clearance is recorded, but now deferred on the basis that the exec procedure may yet fail. (ii) This then calls the new security_bprm_set_creds(). This should calculate the new LSM and capability credentials into *bprm->cred. This folds together security_bprm_set() and parts of security_bprm_apply_creds() (these two have been removed). Anything that might fail must be done at this point. (iii) bprm->cred_prepared is set to 1. bprm->cred_prepared is 0 on the first pass of the security calculations, and 1 on all subsequent passes. This allows SELinux in (ii) to base its calculations only on the initial script and not on the interpreter. (d) flush_old_exec() is called to commit the task to execution. This performs the following steps with regard to credentials: (i) Clear pdeath_signal and set dumpable on certain circumstances that may not be covered by commit_creds(). (ii) Clear any bits in current->personality that were deferred from (c.i). (e) install_exec_creds() [compute_creds() as was] is called to install the new credentials. This performs the following steps with regard to credentials: (i) Calls security_bprm_committing_creds() to apply any security requirements, such as flushing unauthorised files in SELinux, that must be done before the credentials are changed. This is made up of bits of security_bprm_apply_creds() and security_bprm_post_apply_creds(), both of which have been removed. This function is not allowed to fail; anything that might fail must have been done in (c.ii). (ii) Calls commit_creds() to apply the new credentials in a single assignment (more or less). Possibly pdeath_signal and dumpable should be part of struct creds. (iii) Unlocks the task's cred_replace_mutex, thus allowing PTRACE_ATTACH to take place. (iv) Clears The bprm->cred pointer as the credentials it was holding are now immutable. (v) Calls security_bprm_committed_creds() to apply any security alterations that must be done after the creds have been changed. SELinux uses this to flush signals and signal handlers. (f) If an error occurs before (d.i), bprm_free() will call abort_creds() to destroy the proposed new credentials and will then unlock cred_replace_mutex. No changes to the credentials will have been made. (2) LSM interface. A number of functions have been changed, added or removed: (*) security_bprm_alloc(), ->bprm_alloc_security() (*) security_bprm_free(), ->bprm_free_security() Removed in favour of preparing new credentials and modifying those. (*) security_bprm_apply_creds(), ->bprm_apply_creds() (*) security_bprm_post_apply_creds(), ->bprm_post_apply_creds() Removed; split between security_bprm_set_creds(), security_bprm_committing_creds() and security_bprm_committed_creds(). (*) security_bprm_set(), ->bprm_set_security() Removed; folded into security_bprm_set_creds(). (*) security_bprm_set_creds(), ->bprm_set_creds() New. The new credentials in bprm->creds should be checked and set up as appropriate. bprm->cred_prepared is 0 on the first call, 1 on the second and subsequent calls. (*) security_bprm_committing_creds(), ->bprm_committing_creds() (*) security_bprm_committed_creds(), ->bprm_committed_creds() New. Apply the security effects of the new credentials. This includes closing unauthorised files in SELinux. This function may not fail. When the former is called, the creds haven't yet been applied to the process; when the latter is called, they have. The former may access bprm->cred, the latter may not. (3) SELinux. SELinux has a number of changes, in addition to those to support the LSM interface changes mentioned above: (a) The bprm_security_struct struct has been removed in favour of using the credentials-under-construction approach. (c) flush_unauthorized_files() now takes a cred pointer and passes it on to inode_has_perm(), file_has_perm() and dentry_open(). Signed-off-by: David Howells <dhowells@redhat.com> Acked-by: James Morris <jmorris@namei.org> Acked-by: Serge Hallyn <serue@us.ibm.com> Signed-off-by: James Morris <jmorris@namei.org>
2008-11-14 07:39:24 +08:00
return 0;
}
CRED: Make execve() take advantage of copy-on-write credentials Make execve() take advantage of copy-on-write credentials, allowing it to set up the credentials in advance, and then commit the whole lot after the point of no return. This patch and the preceding patches have been tested with the LTP SELinux testsuite. This patch makes several logical sets of alteration: (1) execve(). The credential bits from struct linux_binprm are, for the most part, replaced with a single credentials pointer (bprm->cred). This means that all the creds can be calculated in advance and then applied at the point of no return with no possibility of failure. I would like to replace bprm->cap_effective with: cap_isclear(bprm->cap_effective) but this seems impossible due to special behaviour for processes of pid 1 (they always retain their parent's capability masks where normally they'd be changed - see cap_bprm_set_creds()). The following sequence of events now happens: (a) At the start of do_execve, the current task's cred_exec_mutex is locked to prevent PTRACE_ATTACH from obsoleting the calculation of creds that we make. (a) prepare_exec_creds() is then called to make a copy of the current task's credentials and prepare it. This copy is then assigned to bprm->cred. This renders security_bprm_alloc() and security_bprm_free() unnecessary, and so they've been removed. (b) The determination of unsafe execution is now performed immediately after (a) rather than later on in the code. The result is stored in bprm->unsafe for future reference. (c) prepare_binprm() is called, possibly multiple times. (i) This applies the result of set[ug]id binaries to the new creds attached to bprm->cred. Personality bit clearance is recorded, but now deferred on the basis that the exec procedure may yet fail. (ii) This then calls the new security_bprm_set_creds(). This should calculate the new LSM and capability credentials into *bprm->cred. This folds together security_bprm_set() and parts of security_bprm_apply_creds() (these two have been removed). Anything that might fail must be done at this point. (iii) bprm->cred_prepared is set to 1. bprm->cred_prepared is 0 on the first pass of the security calculations, and 1 on all subsequent passes. This allows SELinux in (ii) to base its calculations only on the initial script and not on the interpreter. (d) flush_old_exec() is called to commit the task to execution. This performs the following steps with regard to credentials: (i) Clear pdeath_signal and set dumpable on certain circumstances that may not be covered by commit_creds(). (ii) Clear any bits in current->personality that were deferred from (c.i). (e) install_exec_creds() [compute_creds() as was] is called to install the new credentials. This performs the following steps with regard to credentials: (i) Calls security_bprm_committing_creds() to apply any security requirements, such as flushing unauthorised files in SELinux, that must be done before the credentials are changed. This is made up of bits of security_bprm_apply_creds() and security_bprm_post_apply_creds(), both of which have been removed. This function is not allowed to fail; anything that might fail must have been done in (c.ii). (ii) Calls commit_creds() to apply the new credentials in a single assignment (more or less). Possibly pdeath_signal and dumpable should be part of struct creds. (iii) Unlocks the task's cred_replace_mutex, thus allowing PTRACE_ATTACH to take place. (iv) Clears The bprm->cred pointer as the credentials it was holding are now immutable. (v) Calls security_bprm_committed_creds() to apply any security alterations that must be done after the creds have been changed. SELinux uses this to flush signals and signal handlers. (f) If an error occurs before (d.i), bprm_free() will call abort_creds() to destroy the proposed new credentials and will then unlock cred_replace_mutex. No changes to the credentials will have been made. (2) LSM interface. A number of functions have been changed, added or removed: (*) security_bprm_alloc(), ->bprm_alloc_security() (*) security_bprm_free(), ->bprm_free_security() Removed in favour of preparing new credentials and modifying those. (*) security_bprm_apply_creds(), ->bprm_apply_creds() (*) security_bprm_post_apply_creds(), ->bprm_post_apply_creds() Removed; split between security_bprm_set_creds(), security_bprm_committing_creds() and security_bprm_committed_creds(). (*) security_bprm_set(), ->bprm_set_security() Removed; folded into security_bprm_set_creds(). (*) security_bprm_set_creds(), ->bprm_set_creds() New. The new credentials in bprm->creds should be checked and set up as appropriate. bprm->cred_prepared is 0 on the first call, 1 on the second and subsequent calls. (*) security_bprm_committing_creds(), ->bprm_committing_creds() (*) security_bprm_committed_creds(), ->bprm_committed_creds() New. Apply the security effects of the new credentials. This includes closing unauthorised files in SELinux. This function may not fail. When the former is called, the creds haven't yet been applied to the process; when the latter is called, they have. The former may access bprm->cred, the latter may not. (3) SELinux. SELinux has a number of changes, in addition to those to support the LSM interface changes mentioned above: (a) The bprm_security_struct struct has been removed in favour of using the credentials-under-construction approach. (c) flush_unauthorized_files() now takes a cred pointer and passes it on to inode_has_perm(), file_has_perm() and dentry_open(). Signed-off-by: David Howells <dhowells@redhat.com> Acked-by: James Morris <jmorris@namei.org> Acked-by: Serge Hallyn <serue@us.ibm.com> Signed-off-by: James Morris <jmorris@namei.org>
2008-11-14 07:39:24 +08:00
static inline void security_bprm_committing_creds(struct linux_binprm *bprm)
{
}
CRED: Make execve() take advantage of copy-on-write credentials Make execve() take advantage of copy-on-write credentials, allowing it to set up the credentials in advance, and then commit the whole lot after the point of no return. This patch and the preceding patches have been tested with the LTP SELinux testsuite. This patch makes several logical sets of alteration: (1) execve(). The credential bits from struct linux_binprm are, for the most part, replaced with a single credentials pointer (bprm->cred). This means that all the creds can be calculated in advance and then applied at the point of no return with no possibility of failure. I would like to replace bprm->cap_effective with: cap_isclear(bprm->cap_effective) but this seems impossible due to special behaviour for processes of pid 1 (they always retain their parent's capability masks where normally they'd be changed - see cap_bprm_set_creds()). The following sequence of events now happens: (a) At the start of do_execve, the current task's cred_exec_mutex is locked to prevent PTRACE_ATTACH from obsoleting the calculation of creds that we make. (a) prepare_exec_creds() is then called to make a copy of the current task's credentials and prepare it. This copy is then assigned to bprm->cred. This renders security_bprm_alloc() and security_bprm_free() unnecessary, and so they've been removed. (b) The determination of unsafe execution is now performed immediately after (a) rather than later on in the code. The result is stored in bprm->unsafe for future reference. (c) prepare_binprm() is called, possibly multiple times. (i) This applies the result of set[ug]id binaries to the new creds attached to bprm->cred. Personality bit clearance is recorded, but now deferred on the basis that the exec procedure may yet fail. (ii) This then calls the new security_bprm_set_creds(). This should calculate the new LSM and capability credentials into *bprm->cred. This folds together security_bprm_set() and parts of security_bprm_apply_creds() (these two have been removed). Anything that might fail must be done at this point. (iii) bprm->cred_prepared is set to 1. bprm->cred_prepared is 0 on the first pass of the security calculations, and 1 on all subsequent passes. This allows SELinux in (ii) to base its calculations only on the initial script and not on the interpreter. (d) flush_old_exec() is called to commit the task to execution. This performs the following steps with regard to credentials: (i) Clear pdeath_signal and set dumpable on certain circumstances that may not be covered by commit_creds(). (ii) Clear any bits in current->personality that were deferred from (c.i). (e) install_exec_creds() [compute_creds() as was] is called to install the new credentials. This performs the following steps with regard to credentials: (i) Calls security_bprm_committing_creds() to apply any security requirements, such as flushing unauthorised files in SELinux, that must be done before the credentials are changed. This is made up of bits of security_bprm_apply_creds() and security_bprm_post_apply_creds(), both of which have been removed. This function is not allowed to fail; anything that might fail must have been done in (c.ii). (ii) Calls commit_creds() to apply the new credentials in a single assignment (more or less). Possibly pdeath_signal and dumpable should be part of struct creds. (iii) Unlocks the task's cred_replace_mutex, thus allowing PTRACE_ATTACH to take place. (iv) Clears The bprm->cred pointer as the credentials it was holding are now immutable. (v) Calls security_bprm_committed_creds() to apply any security alterations that must be done after the creds have been changed. SELinux uses this to flush signals and signal handlers. (f) If an error occurs before (d.i), bprm_free() will call abort_creds() to destroy the proposed new credentials and will then unlock cred_replace_mutex. No changes to the credentials will have been made. (2) LSM interface. A number of functions have been changed, added or removed: (*) security_bprm_alloc(), ->bprm_alloc_security() (*) security_bprm_free(), ->bprm_free_security() Removed in favour of preparing new credentials and modifying those. (*) security_bprm_apply_creds(), ->bprm_apply_creds() (*) security_bprm_post_apply_creds(), ->bprm_post_apply_creds() Removed; split between security_bprm_set_creds(), security_bprm_committing_creds() and security_bprm_committed_creds(). (*) security_bprm_set(), ->bprm_set_security() Removed; folded into security_bprm_set_creds(). (*) security_bprm_set_creds(), ->bprm_set_creds() New. The new credentials in bprm->creds should be checked and set up as appropriate. bprm->cred_prepared is 0 on the first call, 1 on the second and subsequent calls. (*) security_bprm_committing_creds(), ->bprm_committing_creds() (*) security_bprm_committed_creds(), ->bprm_committed_creds() New. Apply the security effects of the new credentials. This includes closing unauthorised files in SELinux. This function may not fail. When the former is called, the creds haven't yet been applied to the process; when the latter is called, they have. The former may access bprm->cred, the latter may not. (3) SELinux. SELinux has a number of changes, in addition to those to support the LSM interface changes mentioned above: (a) The bprm_security_struct struct has been removed in favour of using the credentials-under-construction approach. (c) flush_unauthorized_files() now takes a cred pointer and passes it on to inode_has_perm(), file_has_perm() and dentry_open(). Signed-off-by: David Howells <dhowells@redhat.com> Acked-by: James Morris <jmorris@namei.org> Acked-by: Serge Hallyn <serue@us.ibm.com> Signed-off-by: James Morris <jmorris@namei.org>
2008-11-14 07:39:24 +08:00
static inline void security_bprm_committed_creds(struct linux_binprm *bprm)
{
}
static inline int security_bprm_secureexec(struct linux_binprm *bprm)
{
return cap_bprm_secureexec(bprm);
}
static inline int security_sb_alloc(struct super_block *sb)
{
return 0;
}
static inline void security_sb_free(struct super_block *sb)
{ }
static inline int security_sb_copy_data(char *orig, char *copy)
{
return 0;
}
static inline int security_sb_kern_mount(struct super_block *sb, int flags, void *data)
{
return 0;
}
static inline int security_sb_show_options(struct seq_file *m,
struct super_block *sb)
{
return 0;
}
static inline int security_sb_statfs(struct dentry *dentry)
{
return 0;
}
static inline int security_sb_mount(char *dev_name, struct path *path,
char *type, unsigned long flags,
void *data)
{
return 0;
}
static inline int security_sb_umount(struct vfsmount *mnt, int flags)
{
return 0;
}
static inline int security_sb_pivotroot(struct path *old_path,
struct path *new_path)
{
return 0;
}
static inline int security_sb_set_mnt_opts(struct super_block *sb,
struct security_mnt_opts *opts)
{
return 0;
}
static inline void security_sb_clone_mnt_opts(const struct super_block *oldsb,
struct super_block *newsb)
{ }
static inline int security_sb_parse_opts_str(char *options, struct security_mnt_opts *opts)
{
return 0;
}
static inline int security_inode_alloc(struct inode *inode)
{
return 0;
}
static inline void security_inode_free(struct inode *inode)
{ }
[PATCH] security: enable atomic inode security labeling The following patch set enables atomic security labeling of newly created inodes by altering the fs code to invoke a new LSM hook to obtain the security attribute to apply to a newly created inode and to set up the incore inode security state during the inode creation transaction. This parallels the existing processing for setting ACLs on newly created inodes. Otherwise, it is possible for new inodes to be accessed by another thread via the dcache prior to complete security setup (presently handled by the post_create/mkdir/... LSM hooks in the VFS) and a newly created inode may be left unlabeled on the disk in the event of a crash. SELinux presently works around the issue by ensuring that the incore inode security label is initialized to a special SID that is inaccessible to unprivileged processes (in accordance with policy), thereby preventing inappropriate access but potentially causing false denials on legitimate accesses. A simple test program demonstrates such false denials on SELinux, and the patch solves the problem. Similar such false denials have been encountered in real applications. This patch defines a new inode_init_security LSM hook to obtain the security attribute to apply to a newly created inode and to set up the incore inode security state for it, and adds a corresponding hook function implementation to SELinux. Signed-off-by: Stephen Smalley <sds@tycho.nsa.gov> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-09-10 04:01:35 +08:00
static inline int security_inode_init_security(struct inode *inode,
[PATCH] security: enable atomic inode security labeling The following patch set enables atomic security labeling of newly created inodes by altering the fs code to invoke a new LSM hook to obtain the security attribute to apply to a newly created inode and to set up the incore inode security state during the inode creation transaction. This parallels the existing processing for setting ACLs on newly created inodes. Otherwise, it is possible for new inodes to be accessed by another thread via the dcache prior to complete security setup (presently handled by the post_create/mkdir/... LSM hooks in the VFS) and a newly created inode may be left unlabeled on the disk in the event of a crash. SELinux presently works around the issue by ensuring that the incore inode security label is initialized to a special SID that is inaccessible to unprivileged processes (in accordance with policy), thereby preventing inappropriate access but potentially causing false denials on legitimate accesses. A simple test program demonstrates such false denials on SELinux, and the patch solves the problem. Similar such false denials have been encountered in real applications. This patch defines a new inode_init_security LSM hook to obtain the security attribute to apply to a newly created inode and to set up the incore inode security state for it, and adds a corresponding hook function implementation to SELinux. Signed-off-by: Stephen Smalley <sds@tycho.nsa.gov> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-09-10 04:01:35 +08:00
struct inode *dir,
char **name,
void **value,
size_t *len)
{
return -EOPNOTSUPP;
}
static inline int security_inode_create(struct inode *dir,
struct dentry *dentry,
int mode)
{
return 0;
}
static inline int security_inode_link(struct dentry *old_dentry,
struct inode *dir,
struct dentry *new_dentry)
{
return 0;
}
static inline int security_inode_unlink(struct inode *dir,
struct dentry *dentry)
{
return 0;
}
static inline int security_inode_symlink(struct inode *dir,
struct dentry *dentry,
const char *old_name)
{
return 0;
}
static inline int security_inode_mkdir(struct inode *dir,
struct dentry *dentry,
int mode)
{
return 0;
}
static inline int security_inode_rmdir(struct inode *dir,
struct dentry *dentry)
{
return 0;
}
static inline int security_inode_mknod(struct inode *dir,
struct dentry *dentry,
int mode, dev_t dev)
{
return 0;
}
static inline int security_inode_rename(struct inode *old_dir,
struct dentry *old_dentry,
struct inode *new_dir,
struct dentry *new_dentry)
{
return 0;
}
static inline int security_inode_readlink(struct dentry *dentry)
{
return 0;
}
static inline int security_inode_follow_link(struct dentry *dentry,
struct nameidata *nd)
{
return 0;
}
static inline int security_inode_permission(struct inode *inode, int mask)
{
return 0;
}
fs: rcu-walk for path lookup Perform common cases of path lookups without any stores or locking in the ancestor dentry elements. This is called rcu-walk, as opposed to the current algorithm which is a refcount based walk, or ref-walk. This results in far fewer atomic operations on every path element, significantly improving path lookup performance. It also avoids cacheline bouncing on common dentries, significantly improving scalability. The overall design is like this: * LOOKUP_RCU is set in nd->flags, which distinguishes rcu-walk from ref-walk. * Take the RCU lock for the entire path walk, starting with the acquiring of the starting path (eg. root/cwd/fd-path). So now dentry refcounts are not required for dentry persistence. * synchronize_rcu is called when unregistering a filesystem, so we can access d_ops and i_ops during rcu-walk. * Similarly take the vfsmount lock for the entire path walk. So now mnt refcounts are not required for persistence. Also we are free to perform mount lookups, and to assume dentry mount points and mount roots are stable up and down the path. * Have a per-dentry seqlock to protect the dentry name, parent, and inode, so we can load this tuple atomically, and also check whether any of its members have changed. * Dentry lookups (based on parent, candidate string tuple) recheck the parent sequence after the child is found in case anything changed in the parent during the path walk. * inode is also RCU protected so we can load d_inode and use the inode for limited things. * i_mode, i_uid, i_gid can be tested for exec permissions during path walk. * i_op can be loaded. When we reach the destination dentry, we lock it, recheck lookup sequence, and increment its refcount and mountpoint refcount. RCU and vfsmount locks are dropped. This is termed "dropping rcu-walk". If the dentry refcount does not match, we can not drop rcu-walk gracefully at the current point in the lokup, so instead return -ECHILD (for want of a better errno). This signals the path walking code to re-do the entire lookup with a ref-walk. Aside from the final dentry, there are other situations that may be encounted where we cannot continue rcu-walk. In that case, we drop rcu-walk (ie. take a reference on the last good dentry) and continue with a ref-walk. Again, if we can drop rcu-walk gracefully, we return -ECHILD and do the whole lookup using ref-walk. But it is very important that we can continue with ref-walk for most cases, particularly to avoid the overhead of double lookups, and to gain the scalability advantages on common path elements (like cwd and root). The cases where rcu-walk cannot continue are: * NULL dentry (ie. any uncached path element) * parent with d_inode->i_op->permission or ACLs * dentries with d_revalidate * Following links In future patches, permission checks and d_revalidate become rcu-walk aware. It may be possible eventually to make following links rcu-walk aware. Uncached path elements will always require dropping to ref-walk mode, at the very least because i_mutex needs to be grabbed, and objects allocated. Signed-off-by: Nick Piggin <npiggin@kernel.dk>
2011-01-07 14:49:52 +08:00
static inline int security_inode_exec_permission(struct inode *inode,
unsigned int flags)
{
return 0;
}
static inline int security_inode_setattr(struct dentry *dentry,
struct iattr *attr)
{
return 0;
}
static inline int security_inode_getattr(struct vfsmount *mnt,
struct dentry *dentry)
{
return 0;
}
static inline int security_inode_setxattr(struct dentry *dentry,
const char *name, const void *value, size_t size, int flags)
{
return cap_inode_setxattr(dentry, name, value, size, flags);
}
static inline void security_inode_post_setxattr(struct dentry *dentry,
const char *name, const void *value, size_t size, int flags)
{ }
static inline int security_inode_getxattr(struct dentry *dentry,
const char *name)
{
return 0;
}
static inline int security_inode_listxattr(struct dentry *dentry)
{
return 0;
}
static inline int security_inode_removexattr(struct dentry *dentry,
const char *name)
{
return cap_inode_removexattr(dentry, name);
}
Implement file posix capabilities Implement file posix capabilities. This allows programs to be given a subset of root's powers regardless of who runs them, without having to use setuid and giving the binary all of root's powers. This version works with Kaigai Kohei's userspace tools, found at http://www.kaigai.gr.jp/index.php. For more information on how to use this patch, Chris Friedhoff has posted a nice page at http://www.friedhoff.org/fscaps.html. Changelog: Nov 27: Incorporate fixes from Andrew Morton (security-introduce-file-caps-tweaks and security-introduce-file-caps-warning-fix) Fix Kconfig dependency. Fix change signaling behavior when file caps are not compiled in. Nov 13: Integrate comments from Alexey: Remove CONFIG_ ifdef from capability.h, and use %zd for printing a size_t. Nov 13: Fix endianness warnings by sparse as suggested by Alexey Dobriyan. Nov 09: Address warnings of unused variables at cap_bprm_set_security when file capabilities are disabled, and simultaneously clean up the code a little, by pulling the new code into a helper function. Nov 08: For pointers to required userspace tools and how to use them, see http://www.friedhoff.org/fscaps.html. Nov 07: Fix the calculation of the highest bit checked in check_cap_sanity(). Nov 07: Allow file caps to be enabled without CONFIG_SECURITY, since capabilities are the default. Hook cap_task_setscheduler when !CONFIG_SECURITY. Move capable(TASK_KILL) to end of cap_task_kill to reduce audit messages. Nov 05: Add secondary calls in selinux/hooks.c to task_setioprio and task_setscheduler so that selinux and capabilities with file cap support can be stacked. Sep 05: As Seth Arnold points out, uid checks are out of place for capability code. Sep 01: Define task_setscheduler, task_setioprio, cap_task_kill, and task_setnice to make sure a user cannot affect a process in which they called a program with some fscaps. One remaining question is the note under task_setscheduler: are we ok with CAP_SYS_NICE being sufficient to confine a process to a cpuset? It is a semantic change, as without fsccaps, attach_task doesn't allow CAP_SYS_NICE to override the uid equivalence check. But since it uses security_task_setscheduler, which elsewhere is used where CAP_SYS_NICE can be used to override the uid equivalence check, fixing it might be tough. task_setscheduler note: this also controls cpuset:attach_task. Are we ok with CAP_SYS_NICE being used to confine to a cpuset? task_setioprio task_setnice sys_setpriority uses this (through set_one_prio) for another process. Need same checks as setrlimit Aug 21: Updated secureexec implementation to reflect the fact that euid and uid might be the same and nonzero, but the process might still have elevated caps. Aug 15: Handle endianness of xattrs. Enforce capability version match between kernel and disk. Enforce that no bits beyond the known max capability are set, else return -EPERM. With this extra processing, it may be worth reconsidering doing all the work at bprm_set_security rather than d_instantiate. Aug 10: Always call getxattr at bprm_set_security, rather than caching it at d_instantiate. [morgan@kernel.org: file-caps clean up for linux/capability.h] [bunk@kernel.org: unexport cap_inode_killpriv] Signed-off-by: Serge E. Hallyn <serue@us.ibm.com> Cc: Stephen Smalley <sds@tycho.nsa.gov> Cc: James Morris <jmorris@namei.org> Cc: Chris Wright <chrisw@sous-sol.org> Cc: Andrew Morgan <morgan@kernel.org> Signed-off-by: Andrew Morgan <morgan@kernel.org> Signed-off-by: Adrian Bunk <bunk@kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2007-10-17 14:31:36 +08:00
static inline int security_inode_need_killpriv(struct dentry *dentry)
{
return cap_inode_need_killpriv(dentry);
}
static inline int security_inode_killpriv(struct dentry *dentry)
{
return cap_inode_killpriv(dentry);
}
static inline int security_inode_getsecurity(const struct inode *inode, const char *name, void **buffer, bool alloc)
{
return -EOPNOTSUPP;
}
static inline int security_inode_setsecurity(struct inode *inode, const char *name, const void *value, size_t size, int flags)
{
return -EOPNOTSUPP;
}
static inline int security_inode_listsecurity(struct inode *inode, char *buffer, size_t buffer_size)
{
return 0;
}
static inline void security_inode_getsecid(const struct inode *inode, u32 *secid)
{
*secid = 0;
}
static inline int security_file_permission(struct file *file, int mask)
{
return 0;
}
static inline int security_file_alloc(struct file *file)
{
return 0;
}
static inline void security_file_free(struct file *file)
{ }
static inline int security_file_ioctl(struct file *file, unsigned int cmd,
unsigned long arg)
{
return 0;
}
static inline int security_file_mmap(struct file *file, unsigned long reqprot,
unsigned long prot,
unsigned long flags,
unsigned long addr,
unsigned long addr_only)
{
return cap_file_mmap(file, reqprot, prot, flags, addr, addr_only);
}
static inline int security_file_mprotect(struct vm_area_struct *vma,
unsigned long reqprot,
unsigned long prot)
{
return 0;
}
static inline int security_file_lock(struct file *file, unsigned int cmd)
{
return 0;
}
static inline int security_file_fcntl(struct file *file, unsigned int cmd,
unsigned long arg)
{
return 0;
}
static inline int security_file_set_fowner(struct file *file)
{
return 0;
}
static inline int security_file_send_sigiotask(struct task_struct *tsk,
struct fown_struct *fown,
int sig)
{
return 0;
}
static inline int security_file_receive(struct file *file)
{
return 0;
}
static inline int security_dentry_open(struct file *file,
const struct cred *cred)
{
return 0;
}
static inline int security_task_create(unsigned long clone_flags)
{
return 0;
}
static inline int security_cred_alloc_blank(struct cred *cred, gfp_t gfp)
{
return 0;
}
KEYS: Add a keyctl to install a process's session keyring on its parent [try #6] Add a keyctl to install a process's session keyring onto its parent. This replaces the parent's session keyring. Because the COW credential code does not permit one process to change another process's credentials directly, the change is deferred until userspace next starts executing again. Normally this will be after a wait*() syscall. To support this, three new security hooks have been provided: cred_alloc_blank() to allocate unset security creds, cred_transfer() to fill in the blank security creds and key_session_to_parent() - which asks the LSM if the process may replace its parent's session keyring. The replacement may only happen if the process has the same ownership details as its parent, and the process has LINK permission on the session keyring, and the session keyring is owned by the process, and the LSM permits it. Note that this requires alteration to each architecture's notify_resume path. This has been done for all arches barring blackfin, m68k* and xtensa, all of which need assembly alteration to support TIF_NOTIFY_RESUME. This allows the replacement to be performed at the point the parent process resumes userspace execution. This allows the userspace AFS pioctl emulation to fully emulate newpag() and the VIOCSETTOK and VIOCSETTOK2 pioctls, all of which require the ability to alter the parent process's PAG membership. However, since kAFS doesn't use PAGs per se, but rather dumps the keys into the session keyring, the session keyring of the parent must be replaced if, for example, VIOCSETTOK is passed the newpag flag. This can be tested with the following program: #include <stdio.h> #include <stdlib.h> #include <keyutils.h> #define KEYCTL_SESSION_TO_PARENT 18 #define OSERROR(X, S) do { if ((long)(X) == -1) { perror(S); exit(1); } } while(0) int main(int argc, char **argv) { key_serial_t keyring, key; long ret; keyring = keyctl_join_session_keyring(argv[1]); OSERROR(keyring, "keyctl_join_session_keyring"); key = add_key("user", "a", "b", 1, keyring); OSERROR(key, "add_key"); ret = keyctl(KEYCTL_SESSION_TO_PARENT); OSERROR(ret, "KEYCTL_SESSION_TO_PARENT"); return 0; } Compiled and linked with -lkeyutils, you should see something like: [dhowells@andromeda ~]$ keyctl show Session Keyring -3 --alswrv 4043 4043 keyring: _ses 355907932 --alswrv 4043 -1 \_ keyring: _uid.4043 [dhowells@andromeda ~]$ /tmp/newpag [dhowells@andromeda ~]$ keyctl show Session Keyring -3 --alswrv 4043 4043 keyring: _ses 1055658746 --alswrv 4043 4043 \_ user: a [dhowells@andromeda ~]$ /tmp/newpag hello [dhowells@andromeda ~]$ keyctl show Session Keyring -3 --alswrv 4043 4043 keyring: hello 340417692 --alswrv 4043 4043 \_ user: a Where the test program creates a new session keyring, sticks a user key named 'a' into it and then installs it on its parent. Signed-off-by: David Howells <dhowells@redhat.com> Signed-off-by: James Morris <jmorris@namei.org>
2009-09-02 16:14:21 +08:00
CRED: Inaugurate COW credentials Inaugurate copy-on-write credentials management. This uses RCU to manage the credentials pointer in the task_struct with respect to accesses by other tasks. A process may only modify its own credentials, and so does not need locking to access or modify its own credentials. A mutex (cred_replace_mutex) is added to the task_struct to control the effect of PTRACE_ATTACHED on credential calculations, particularly with respect to execve(). With this patch, the contents of an active credentials struct may not be changed directly; rather a new set of credentials must be prepared, modified and committed using something like the following sequence of events: struct cred *new = prepare_creds(); int ret = blah(new); if (ret < 0) { abort_creds(new); return ret; } return commit_creds(new); There are some exceptions to this rule: the keyrings pointed to by the active credentials may be instantiated - keyrings violate the COW rule as managing COW keyrings is tricky, given that it is possible for a task to directly alter the keys in a keyring in use by another task. To help enforce this, various pointers to sets of credentials, such as those in the task_struct, are declared const. The purpose of this is compile-time discouragement of altering credentials through those pointers. Once a set of credentials has been made public through one of these pointers, it may not be modified, except under special circumstances: (1) Its reference count may incremented and decremented. (2) The keyrings to which it points may be modified, but not replaced. The only safe way to modify anything else is to create a replacement and commit using the functions described in Documentation/credentials.txt (which will be added by a later patch). This patch and the preceding patches have been tested with the LTP SELinux testsuite. This patch makes several logical sets of alteration: (1) execve(). This now prepares and commits credentials in various places in the security code rather than altering the current creds directly. (2) Temporary credential overrides. do_coredump() and sys_faccessat() now prepare their own credentials and temporarily override the ones currently on the acting thread, whilst preventing interference from other threads by holding cred_replace_mutex on the thread being dumped. This will be replaced in a future patch by something that hands down the credentials directly to the functions being called, rather than altering the task's objective credentials. (3) LSM interface. A number of functions have been changed, added or removed: (*) security_capset_check(), ->capset_check() (*) security_capset_set(), ->capset_set() Removed in favour of security_capset(). (*) security_capset(), ->capset() New. This is passed a pointer to the new creds, a pointer to the old creds and the proposed capability sets. It should fill in the new creds or return an error. All pointers, barring the pointer to the new creds, are now const. (*) security_bprm_apply_creds(), ->bprm_apply_creds() Changed; now returns a value, which will cause the process to be killed if it's an error. (*) security_task_alloc(), ->task_alloc_security() Removed in favour of security_prepare_creds(). (*) security_cred_free(), ->cred_free() New. Free security data attached to cred->security. (*) security_prepare_creds(), ->cred_prepare() New. Duplicate any security data attached to cred->security. (*) security_commit_creds(), ->cred_commit() New. Apply any security effects for the upcoming installation of new security by commit_creds(). (*) security_task_post_setuid(), ->task_post_setuid() Removed in favour of security_task_fix_setuid(). (*) security_task_fix_setuid(), ->task_fix_setuid() Fix up the proposed new credentials for setuid(). This is used by cap_set_fix_setuid() to implicitly adjust capabilities in line with setuid() changes. Changes are made to the new credentials, rather than the task itself as in security_task_post_setuid(). (*) security_task_reparent_to_init(), ->task_reparent_to_init() Removed. Instead the task being reparented to init is referred directly to init's credentials. NOTE! This results in the loss of some state: SELinux's osid no longer records the sid of the thread that forked it. (*) security_key_alloc(), ->key_alloc() (*) security_key_permission(), ->key_permission() Changed. These now take cred pointers rather than task pointers to refer to the security context. (4) sys_capset(). This has been simplified and uses less locking. The LSM functions it calls have been merged. (5) reparent_to_kthreadd(). This gives the current thread the same credentials as init by simply using commit_thread() to point that way. (6) __sigqueue_alloc() and switch_uid() __sigqueue_alloc() can't stop the target task from changing its creds beneath it, so this function gets a reference to the currently applicable user_struct which it then passes into the sigqueue struct it returns if successful. switch_uid() is now called from commit_creds(), and possibly should be folded into that. commit_creds() should take care of protecting __sigqueue_alloc(). (7) [sg]et[ug]id() and co and [sg]et_current_groups. The set functions now all use prepare_creds(), commit_creds() and abort_creds() to build and check a new set of credentials before applying it. security_task_set[ug]id() is called inside the prepared section. This guarantees that nothing else will affect the creds until we've finished. The calling of set_dumpable() has been moved into commit_creds(). Much of the functionality of set_user() has been moved into commit_creds(). The get functions all simply access the data directly. (8) security_task_prctl() and cap_task_prctl(). security_task_prctl() has been modified to return -ENOSYS if it doesn't want to handle a function, or otherwise return the return value directly rather than through an argument. Additionally, cap_task_prctl() now prepares a new set of credentials, even if it doesn't end up using it. (9) Keyrings. A number of changes have been made to the keyrings code: (a) switch_uid_keyring(), copy_keys(), exit_keys() and suid_keys() have all been dropped and built in to the credentials functions directly. They may want separating out again later. (b) key_alloc() and search_process_keyrings() now take a cred pointer rather than a task pointer to specify the security context. (c) copy_creds() gives a new thread within the same thread group a new thread keyring if its parent had one, otherwise it discards the thread keyring. (d) The authorisation key now points directly to the credentials to extend the search into rather pointing to the task that carries them. (e) Installing thread, process or session keyrings causes a new set of credentials to be created, even though it's not strictly necessary for process or session keyrings (they're shared). (10) Usermode helper. The usermode helper code now carries a cred struct pointer in its subprocess_info struct instead of a new session keyring pointer. This set of credentials is derived from init_cred and installed on the new process after it has been cloned. call_usermodehelper_setup() allocates the new credentials and call_usermodehelper_freeinfo() discards them if they haven't been used. A special cred function (prepare_usermodeinfo_creds()) is provided specifically for call_usermodehelper_setup() to call. call_usermodehelper_setkeys() adjusts the credentials to sport the supplied keyring as the new session keyring. (11) SELinux. SELinux has a number of changes, in addition to those to support the LSM interface changes mentioned above: (a) selinux_setprocattr() no longer does its check for whether the current ptracer can access processes with the new SID inside the lock that covers getting the ptracer's SID. Whilst this lock ensures that the check is done with the ptracer pinned, the result is only valid until the lock is released, so there's no point doing it inside the lock. (12) is_single_threaded(). This function has been extracted from selinux_setprocattr() and put into a file of its own in the lib/ directory as join_session_keyring() now wants to use it too. The code in SELinux just checked to see whether a task shared mm_structs with other tasks (CLONE_VM), but that isn't good enough. We really want to know if they're part of the same thread group (CLONE_THREAD). (13) nfsd. The NFS server daemon now has to use the COW credentials to set the credentials it is going to use. It really needs to pass the credentials down to the functions it calls, but it can't do that until other patches in this series have been applied. Signed-off-by: David Howells <dhowells@redhat.com> Acked-by: James Morris <jmorris@namei.org> Signed-off-by: James Morris <jmorris@namei.org>
2008-11-14 07:39:23 +08:00
static inline void security_cred_free(struct cred *cred)
{ }
static inline int security_prepare_creds(struct cred *new,
const struct cred *old,
gfp_t gfp)
{
return 0;
}
KEYS: Add a keyctl to install a process's session keyring on its parent [try #6] Add a keyctl to install a process's session keyring onto its parent. This replaces the parent's session keyring. Because the COW credential code does not permit one process to change another process's credentials directly, the change is deferred until userspace next starts executing again. Normally this will be after a wait*() syscall. To support this, three new security hooks have been provided: cred_alloc_blank() to allocate unset security creds, cred_transfer() to fill in the blank security creds and key_session_to_parent() - which asks the LSM if the process may replace its parent's session keyring. The replacement may only happen if the process has the same ownership details as its parent, and the process has LINK permission on the session keyring, and the session keyring is owned by the process, and the LSM permits it. Note that this requires alteration to each architecture's notify_resume path. This has been done for all arches barring blackfin, m68k* and xtensa, all of which need assembly alteration to support TIF_NOTIFY_RESUME. This allows the replacement to be performed at the point the parent process resumes userspace execution. This allows the userspace AFS pioctl emulation to fully emulate newpag() and the VIOCSETTOK and VIOCSETTOK2 pioctls, all of which require the ability to alter the parent process's PAG membership. However, since kAFS doesn't use PAGs per se, but rather dumps the keys into the session keyring, the session keyring of the parent must be replaced if, for example, VIOCSETTOK is passed the newpag flag. This can be tested with the following program: #include <stdio.h> #include <stdlib.h> #include <keyutils.h> #define KEYCTL_SESSION_TO_PARENT 18 #define OSERROR(X, S) do { if ((long)(X) == -1) { perror(S); exit(1); } } while(0) int main(int argc, char **argv) { key_serial_t keyring, key; long ret; keyring = keyctl_join_session_keyring(argv[1]); OSERROR(keyring, "keyctl_join_session_keyring"); key = add_key("user", "a", "b", 1, keyring); OSERROR(key, "add_key"); ret = keyctl(KEYCTL_SESSION_TO_PARENT); OSERROR(ret, "KEYCTL_SESSION_TO_PARENT"); return 0; } Compiled and linked with -lkeyutils, you should see something like: [dhowells@andromeda ~]$ keyctl show Session Keyring -3 --alswrv 4043 4043 keyring: _ses 355907932 --alswrv 4043 -1 \_ keyring: _uid.4043 [dhowells@andromeda ~]$ /tmp/newpag [dhowells@andromeda ~]$ keyctl show Session Keyring -3 --alswrv 4043 4043 keyring: _ses 1055658746 --alswrv 4043 4043 \_ user: a [dhowells@andromeda ~]$ /tmp/newpag hello [dhowells@andromeda ~]$ keyctl show Session Keyring -3 --alswrv 4043 4043 keyring: hello 340417692 --alswrv 4043 4043 \_ user: a Where the test program creates a new session keyring, sticks a user key named 'a' into it and then installs it on its parent. Signed-off-by: David Howells <dhowells@redhat.com> Signed-off-by: James Morris <jmorris@namei.org>
2009-09-02 16:14:21 +08:00
static inline void security_transfer_creds(struct cred *new,
const struct cred *old)
{
}
CRED: Allow kernel services to override LSM settings for task actions Allow kernel services to override LSM settings appropriate to the actions performed by a task by duplicating a set of credentials, modifying it and then using task_struct::cred to point to it when performing operations on behalf of a task. This is used, for example, by CacheFiles which has to transparently access the cache on behalf of a process that thinks it is doing, say, NFS accesses with a potentially inappropriate (with respect to accessing the cache) set of credentials. This patch provides two LSM hooks for modifying a task security record: (*) security_kernel_act_as() which allows modification of the security datum with which a task acts on other objects (most notably files). (*) security_kernel_create_files_as() which allows modification of the security datum that is used to initialise the security data on a file that a task creates. The patch also provides four new credentials handling functions, which wrap the LSM functions: (1) prepare_kernel_cred() Prepare a set of credentials for a kernel service to use, based either on a daemon's credentials or on init_cred. All the keyrings are cleared. (2) set_security_override() Set the LSM security ID in a set of credentials to a specific security context, assuming permission from the LSM policy. (3) set_security_override_from_ctx() As (2), but takes the security context as a string. (4) set_create_files_as() Set the file creation LSM security ID in a set of credentials to be the same as that on a particular inode. Signed-off-by: Casey Schaufler <casey@schaufler-ca.com> [Smack changes] Signed-off-by: David Howells <dhowells@redhat.com> Signed-off-by: James Morris <jmorris@namei.org>
2008-11-14 07:39:28 +08:00
static inline int security_kernel_act_as(struct cred *cred, u32 secid)
{
return 0;
}
static inline int security_kernel_create_files_as(struct cred *cred,
struct inode *inode)
{
return 0;
}
static inline int security_kernel_module_request(char *kmod_name)
{
return 0;
}
CRED: Inaugurate COW credentials Inaugurate copy-on-write credentials management. This uses RCU to manage the credentials pointer in the task_struct with respect to accesses by other tasks. A process may only modify its own credentials, and so does not need locking to access or modify its own credentials. A mutex (cred_replace_mutex) is added to the task_struct to control the effect of PTRACE_ATTACHED on credential calculations, particularly with respect to execve(). With this patch, the contents of an active credentials struct may not be changed directly; rather a new set of credentials must be prepared, modified and committed using something like the following sequence of events: struct cred *new = prepare_creds(); int ret = blah(new); if (ret < 0) { abort_creds(new); return ret; } return commit_creds(new); There are some exceptions to this rule: the keyrings pointed to by the active credentials may be instantiated - keyrings violate the COW rule as managing COW keyrings is tricky, given that it is possible for a task to directly alter the keys in a keyring in use by another task. To help enforce this, various pointers to sets of credentials, such as those in the task_struct, are declared const. The purpose of this is compile-time discouragement of altering credentials through those pointers. Once a set of credentials has been made public through one of these pointers, it may not be modified, except under special circumstances: (1) Its reference count may incremented and decremented. (2) The keyrings to which it points may be modified, but not replaced. The only safe way to modify anything else is to create a replacement and commit using the functions described in Documentation/credentials.txt (which will be added by a later patch). This patch and the preceding patches have been tested with the LTP SELinux testsuite. This patch makes several logical sets of alteration: (1) execve(). This now prepares and commits credentials in various places in the security code rather than altering the current creds directly. (2) Temporary credential overrides. do_coredump() and sys_faccessat() now prepare their own credentials and temporarily override the ones currently on the acting thread, whilst preventing interference from other threads by holding cred_replace_mutex on the thread being dumped. This will be replaced in a future patch by something that hands down the credentials directly to the functions being called, rather than altering the task's objective credentials. (3) LSM interface. A number of functions have been changed, added or removed: (*) security_capset_check(), ->capset_check() (*) security_capset_set(), ->capset_set() Removed in favour of security_capset(). (*) security_capset(), ->capset() New. This is passed a pointer to the new creds, a pointer to the old creds and the proposed capability sets. It should fill in the new creds or return an error. All pointers, barring the pointer to the new creds, are now const. (*) security_bprm_apply_creds(), ->bprm_apply_creds() Changed; now returns a value, which will cause the process to be killed if it's an error. (*) security_task_alloc(), ->task_alloc_security() Removed in favour of security_prepare_creds(). (*) security_cred_free(), ->cred_free() New. Free security data attached to cred->security. (*) security_prepare_creds(), ->cred_prepare() New. Duplicate any security data attached to cred->security. (*) security_commit_creds(), ->cred_commit() New. Apply any security effects for the upcoming installation of new security by commit_creds(). (*) security_task_post_setuid(), ->task_post_setuid() Removed in favour of security_task_fix_setuid(). (*) security_task_fix_setuid(), ->task_fix_setuid() Fix up the proposed new credentials for setuid(). This is used by cap_set_fix_setuid() to implicitly adjust capabilities in line with setuid() changes. Changes are made to the new credentials, rather than the task itself as in security_task_post_setuid(). (*) security_task_reparent_to_init(), ->task_reparent_to_init() Removed. Instead the task being reparented to init is referred directly to init's credentials. NOTE! This results in the loss of some state: SELinux's osid no longer records the sid of the thread that forked it. (*) security_key_alloc(), ->key_alloc() (*) security_key_permission(), ->key_permission() Changed. These now take cred pointers rather than task pointers to refer to the security context. (4) sys_capset(). This has been simplified and uses less locking. The LSM functions it calls have been merged. (5) reparent_to_kthreadd(). This gives the current thread the same credentials as init by simply using commit_thread() to point that way. (6) __sigqueue_alloc() and switch_uid() __sigqueue_alloc() can't stop the target task from changing its creds beneath it, so this function gets a reference to the currently applicable user_struct which it then passes into the sigqueue struct it returns if successful. switch_uid() is now called from commit_creds(), and possibly should be folded into that. commit_creds() should take care of protecting __sigqueue_alloc(). (7) [sg]et[ug]id() and co and [sg]et_current_groups. The set functions now all use prepare_creds(), commit_creds() and abort_creds() to build and check a new set of credentials before applying it. security_task_set[ug]id() is called inside the prepared section. This guarantees that nothing else will affect the creds until we've finished. The calling of set_dumpable() has been moved into commit_creds(). Much of the functionality of set_user() has been moved into commit_creds(). The get functions all simply access the data directly. (8) security_task_prctl() and cap_task_prctl(). security_task_prctl() has been modified to return -ENOSYS if it doesn't want to handle a function, or otherwise return the return value directly rather than through an argument. Additionally, cap_task_prctl() now prepares a new set of credentials, even if it doesn't end up using it. (9) Keyrings. A number of changes have been made to the keyrings code: (a) switch_uid_keyring(), copy_keys(), exit_keys() and suid_keys() have all been dropped and built in to the credentials functions directly. They may want separating out again later. (b) key_alloc() and search_process_keyrings() now take a cred pointer rather than a task pointer to specify the security context. (c) copy_creds() gives a new thread within the same thread group a new thread keyring if its parent had one, otherwise it discards the thread keyring. (d) The authorisation key now points directly to the credentials to extend the search into rather pointing to the task that carries them. (e) Installing thread, process or session keyrings causes a new set of credentials to be created, even though it's not strictly necessary for process or session keyrings (they're shared). (10) Usermode helper. The usermode helper code now carries a cred struct pointer in its subprocess_info struct instead of a new session keyring pointer. This set of credentials is derived from init_cred and installed on the new process after it has been cloned. call_usermodehelper_setup() allocates the new credentials and call_usermodehelper_freeinfo() discards them if they haven't been used. A special cred function (prepare_usermodeinfo_creds()) is provided specifically for call_usermodehelper_setup() to call. call_usermodehelper_setkeys() adjusts the credentials to sport the supplied keyring as the new session keyring. (11) SELinux. SELinux has a number of changes, in addition to those to support the LSM interface changes mentioned above: (a) selinux_setprocattr() no longer does its check for whether the current ptracer can access processes with the new SID inside the lock that covers getting the ptracer's SID. Whilst this lock ensures that the check is done with the ptracer pinned, the result is only valid until the lock is released, so there's no point doing it inside the lock. (12) is_single_threaded(). This function has been extracted from selinux_setprocattr() and put into a file of its own in the lib/ directory as join_session_keyring() now wants to use it too. The code in SELinux just checked to see whether a task shared mm_structs with other tasks (CLONE_VM), but that isn't good enough. We really want to know if they're part of the same thread group (CLONE_THREAD). (13) nfsd. The NFS server daemon now has to use the COW credentials to set the credentials it is going to use. It really needs to pass the credentials down to the functions it calls, but it can't do that until other patches in this series have been applied. Signed-off-by: David Howells <dhowells@redhat.com> Acked-by: James Morris <jmorris@namei.org> Signed-off-by: James Morris <jmorris@namei.org>
2008-11-14 07:39:23 +08:00
static inline int security_task_fix_setuid(struct cred *new,
const struct cred *old,
int flags)
{
CRED: Inaugurate COW credentials Inaugurate copy-on-write credentials management. This uses RCU to manage the credentials pointer in the task_struct with respect to accesses by other tasks. A process may only modify its own credentials, and so does not need locking to access or modify its own credentials. A mutex (cred_replace_mutex) is added to the task_struct to control the effect of PTRACE_ATTACHED on credential calculations, particularly with respect to execve(). With this patch, the contents of an active credentials struct may not be changed directly; rather a new set of credentials must be prepared, modified and committed using something like the following sequence of events: struct cred *new = prepare_creds(); int ret = blah(new); if (ret < 0) { abort_creds(new); return ret; } return commit_creds(new); There are some exceptions to this rule: the keyrings pointed to by the active credentials may be instantiated - keyrings violate the COW rule as managing COW keyrings is tricky, given that it is possible for a task to directly alter the keys in a keyring in use by another task. To help enforce this, various pointers to sets of credentials, such as those in the task_struct, are declared const. The purpose of this is compile-time discouragement of altering credentials through those pointers. Once a set of credentials has been made public through one of these pointers, it may not be modified, except under special circumstances: (1) Its reference count may incremented and decremented. (2) The keyrings to which it points may be modified, but not replaced. The only safe way to modify anything else is to create a replacement and commit using the functions described in Documentation/credentials.txt (which will be added by a later patch). This patch and the preceding patches have been tested with the LTP SELinux testsuite. This patch makes several logical sets of alteration: (1) execve(). This now prepares and commits credentials in various places in the security code rather than altering the current creds directly. (2) Temporary credential overrides. do_coredump() and sys_faccessat() now prepare their own credentials and temporarily override the ones currently on the acting thread, whilst preventing interference from other threads by holding cred_replace_mutex on the thread being dumped. This will be replaced in a future patch by something that hands down the credentials directly to the functions being called, rather than altering the task's objective credentials. (3) LSM interface. A number of functions have been changed, added or removed: (*) security_capset_check(), ->capset_check() (*) security_capset_set(), ->capset_set() Removed in favour of security_capset(). (*) security_capset(), ->capset() New. This is passed a pointer to the new creds, a pointer to the old creds and the proposed capability sets. It should fill in the new creds or return an error. All pointers, barring the pointer to the new creds, are now const. (*) security_bprm_apply_creds(), ->bprm_apply_creds() Changed; now returns a value, which will cause the process to be killed if it's an error. (*) security_task_alloc(), ->task_alloc_security() Removed in favour of security_prepare_creds(). (*) security_cred_free(), ->cred_free() New. Free security data attached to cred->security. (*) security_prepare_creds(), ->cred_prepare() New. Duplicate any security data attached to cred->security. (*) security_commit_creds(), ->cred_commit() New. Apply any security effects for the upcoming installation of new security by commit_creds(). (*) security_task_post_setuid(), ->task_post_setuid() Removed in favour of security_task_fix_setuid(). (*) security_task_fix_setuid(), ->task_fix_setuid() Fix up the proposed new credentials for setuid(). This is used by cap_set_fix_setuid() to implicitly adjust capabilities in line with setuid() changes. Changes are made to the new credentials, rather than the task itself as in security_task_post_setuid(). (*) security_task_reparent_to_init(), ->task_reparent_to_init() Removed. Instead the task being reparented to init is referred directly to init's credentials. NOTE! This results in the loss of some state: SELinux's osid no longer records the sid of the thread that forked it. (*) security_key_alloc(), ->key_alloc() (*) security_key_permission(), ->key_permission() Changed. These now take cred pointers rather than task pointers to refer to the security context. (4) sys_capset(). This has been simplified and uses less locking. The LSM functions it calls have been merged. (5) reparent_to_kthreadd(). This gives the current thread the same credentials as init by simply using commit_thread() to point that way. (6) __sigqueue_alloc() and switch_uid() __sigqueue_alloc() can't stop the target task from changing its creds beneath it, so this function gets a reference to the currently applicable user_struct which it then passes into the sigqueue struct it returns if successful. switch_uid() is now called from commit_creds(), and possibly should be folded into that. commit_creds() should take care of protecting __sigqueue_alloc(). (7) [sg]et[ug]id() and co and [sg]et_current_groups. The set functions now all use prepare_creds(), commit_creds() and abort_creds() to build and check a new set of credentials before applying it. security_task_set[ug]id() is called inside the prepared section. This guarantees that nothing else will affect the creds until we've finished. The calling of set_dumpable() has been moved into commit_creds(). Much of the functionality of set_user() has been moved into commit_creds(). The get functions all simply access the data directly. (8) security_task_prctl() and cap_task_prctl(). security_task_prctl() has been modified to return -ENOSYS if it doesn't want to handle a function, or otherwise return the return value directly rather than through an argument. Additionally, cap_task_prctl() now prepares a new set of credentials, even if it doesn't end up using it. (9) Keyrings. A number of changes have been made to the keyrings code: (a) switch_uid_keyring(), copy_keys(), exit_keys() and suid_keys() have all been dropped and built in to the credentials functions directly. They may want separating out again later. (b) key_alloc() and search_process_keyrings() now take a cred pointer rather than a task pointer to specify the security context. (c) copy_creds() gives a new thread within the same thread group a new thread keyring if its parent had one, otherwise it discards the thread keyring. (d) The authorisation key now points directly to the credentials to extend the search into rather pointing to the task that carries them. (e) Installing thread, process or session keyrings causes a new set of credentials to be created, even though it's not strictly necessary for process or session keyrings (they're shared). (10) Usermode helper. The usermode helper code now carries a cred struct pointer in its subprocess_info struct instead of a new session keyring pointer. This set of credentials is derived from init_cred and installed on the new process after it has been cloned. call_usermodehelper_setup() allocates the new credentials and call_usermodehelper_freeinfo() discards them if they haven't been used. A special cred function (prepare_usermodeinfo_creds()) is provided specifically for call_usermodehelper_setup() to call. call_usermodehelper_setkeys() adjusts the credentials to sport the supplied keyring as the new session keyring. (11) SELinux. SELinux has a number of changes, in addition to those to support the LSM interface changes mentioned above: (a) selinux_setprocattr() no longer does its check for whether the current ptracer can access processes with the new SID inside the lock that covers getting the ptracer's SID. Whilst this lock ensures that the check is done with the ptracer pinned, the result is only valid until the lock is released, so there's no point doing it inside the lock. (12) is_single_threaded(). This function has been extracted from selinux_setprocattr() and put into a file of its own in the lib/ directory as join_session_keyring() now wants to use it too. The code in SELinux just checked to see whether a task shared mm_structs with other tasks (CLONE_VM), but that isn't good enough. We really want to know if they're part of the same thread group (CLONE_THREAD). (13) nfsd. The NFS server daemon now has to use the COW credentials to set the credentials it is going to use. It really needs to pass the credentials down to the functions it calls, but it can't do that until other patches in this series have been applied. Signed-off-by: David Howells <dhowells@redhat.com> Acked-by: James Morris <jmorris@namei.org> Signed-off-by: James Morris <jmorris@namei.org>
2008-11-14 07:39:23 +08:00
return cap_task_fix_setuid(new, old, flags);
}
static inline int security_task_setpgid(struct task_struct *p, pid_t pgid)
{
return 0;
}
static inline int security_task_getpgid(struct task_struct *p)
{
return 0;
}
static inline int security_task_getsid(struct task_struct *p)
{
return 0;
}
static inline void security_task_getsecid(struct task_struct *p, u32 *secid)
{
*secid = 0;
}
static inline int security_task_setnice(struct task_struct *p, int nice)
{
Implement file posix capabilities Implement file posix capabilities. This allows programs to be given a subset of root's powers regardless of who runs them, without having to use setuid and giving the binary all of root's powers. This version works with Kaigai Kohei's userspace tools, found at http://www.kaigai.gr.jp/index.php. For more information on how to use this patch, Chris Friedhoff has posted a nice page at http://www.friedhoff.org/fscaps.html. Changelog: Nov 27: Incorporate fixes from Andrew Morton (security-introduce-file-caps-tweaks and security-introduce-file-caps-warning-fix) Fix Kconfig dependency. Fix change signaling behavior when file caps are not compiled in. Nov 13: Integrate comments from Alexey: Remove CONFIG_ ifdef from capability.h, and use %zd for printing a size_t. Nov 13: Fix endianness warnings by sparse as suggested by Alexey Dobriyan. Nov 09: Address warnings of unused variables at cap_bprm_set_security when file capabilities are disabled, and simultaneously clean up the code a little, by pulling the new code into a helper function. Nov 08: For pointers to required userspace tools and how to use them, see http://www.friedhoff.org/fscaps.html. Nov 07: Fix the calculation of the highest bit checked in check_cap_sanity(). Nov 07: Allow file caps to be enabled without CONFIG_SECURITY, since capabilities are the default. Hook cap_task_setscheduler when !CONFIG_SECURITY. Move capable(TASK_KILL) to end of cap_task_kill to reduce audit messages. Nov 05: Add secondary calls in selinux/hooks.c to task_setioprio and task_setscheduler so that selinux and capabilities with file cap support can be stacked. Sep 05: As Seth Arnold points out, uid checks are out of place for capability code. Sep 01: Define task_setscheduler, task_setioprio, cap_task_kill, and task_setnice to make sure a user cannot affect a process in which they called a program with some fscaps. One remaining question is the note under task_setscheduler: are we ok with CAP_SYS_NICE being sufficient to confine a process to a cpuset? It is a semantic change, as without fsccaps, attach_task doesn't allow CAP_SYS_NICE to override the uid equivalence check. But since it uses security_task_setscheduler, which elsewhere is used where CAP_SYS_NICE can be used to override the uid equivalence check, fixing it might be tough. task_setscheduler note: this also controls cpuset:attach_task. Are we ok with CAP_SYS_NICE being used to confine to a cpuset? task_setioprio task_setnice sys_setpriority uses this (through set_one_prio) for another process. Need same checks as setrlimit Aug 21: Updated secureexec implementation to reflect the fact that euid and uid might be the same and nonzero, but the process might still have elevated caps. Aug 15: Handle endianness of xattrs. Enforce capability version match between kernel and disk. Enforce that no bits beyond the known max capability are set, else return -EPERM. With this extra processing, it may be worth reconsidering doing all the work at bprm_set_security rather than d_instantiate. Aug 10: Always call getxattr at bprm_set_security, rather than caching it at d_instantiate. [morgan@kernel.org: file-caps clean up for linux/capability.h] [bunk@kernel.org: unexport cap_inode_killpriv] Signed-off-by: Serge E. Hallyn <serue@us.ibm.com> Cc: Stephen Smalley <sds@tycho.nsa.gov> Cc: James Morris <jmorris@namei.org> Cc: Chris Wright <chrisw@sous-sol.org> Cc: Andrew Morgan <morgan@kernel.org> Signed-off-by: Andrew Morgan <morgan@kernel.org> Signed-off-by: Adrian Bunk <bunk@kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2007-10-17 14:31:36 +08:00
return cap_task_setnice(p, nice);
}
static inline int security_task_setioprio(struct task_struct *p, int ioprio)
{
Implement file posix capabilities Implement file posix capabilities. This allows programs to be given a subset of root's powers regardless of who runs them, without having to use setuid and giving the binary all of root's powers. This version works with Kaigai Kohei's userspace tools, found at http://www.kaigai.gr.jp/index.php. For more information on how to use this patch, Chris Friedhoff has posted a nice page at http://www.friedhoff.org/fscaps.html. Changelog: Nov 27: Incorporate fixes from Andrew Morton (security-introduce-file-caps-tweaks and security-introduce-file-caps-warning-fix) Fix Kconfig dependency. Fix change signaling behavior when file caps are not compiled in. Nov 13: Integrate comments from Alexey: Remove CONFIG_ ifdef from capability.h, and use %zd for printing a size_t. Nov 13: Fix endianness warnings by sparse as suggested by Alexey Dobriyan. Nov 09: Address warnings of unused variables at cap_bprm_set_security when file capabilities are disabled, and simultaneously clean up the code a little, by pulling the new code into a helper function. Nov 08: For pointers to required userspace tools and how to use them, see http://www.friedhoff.org/fscaps.html. Nov 07: Fix the calculation of the highest bit checked in check_cap_sanity(). Nov 07: Allow file caps to be enabled without CONFIG_SECURITY, since capabilities are the default. Hook cap_task_setscheduler when !CONFIG_SECURITY. Move capable(TASK_KILL) to end of cap_task_kill to reduce audit messages. Nov 05: Add secondary calls in selinux/hooks.c to task_setioprio and task_setscheduler so that selinux and capabilities with file cap support can be stacked. Sep 05: As Seth Arnold points out, uid checks are out of place for capability code. Sep 01: Define task_setscheduler, task_setioprio, cap_task_kill, and task_setnice to make sure a user cannot affect a process in which they called a program with some fscaps. One remaining question is the note under task_setscheduler: are we ok with CAP_SYS_NICE being sufficient to confine a process to a cpuset? It is a semantic change, as without fsccaps, attach_task doesn't allow CAP_SYS_NICE to override the uid equivalence check. But since it uses security_task_setscheduler, which elsewhere is used where CAP_SYS_NICE can be used to override the uid equivalence check, fixing it might be tough. task_setscheduler note: this also controls cpuset:attach_task. Are we ok with CAP_SYS_NICE being used to confine to a cpuset? task_setioprio task_setnice sys_setpriority uses this (through set_one_prio) for another process. Need same checks as setrlimit Aug 21: Updated secureexec implementation to reflect the fact that euid and uid might be the same and nonzero, but the process might still have elevated caps. Aug 15: Handle endianness of xattrs. Enforce capability version match between kernel and disk. Enforce that no bits beyond the known max capability are set, else return -EPERM. With this extra processing, it may be worth reconsidering doing all the work at bprm_set_security rather than d_instantiate. Aug 10: Always call getxattr at bprm_set_security, rather than caching it at d_instantiate. [morgan@kernel.org: file-caps clean up for linux/capability.h] [bunk@kernel.org: unexport cap_inode_killpriv] Signed-off-by: Serge E. Hallyn <serue@us.ibm.com> Cc: Stephen Smalley <sds@tycho.nsa.gov> Cc: James Morris <jmorris@namei.org> Cc: Chris Wright <chrisw@sous-sol.org> Cc: Andrew Morgan <morgan@kernel.org> Signed-off-by: Andrew Morgan <morgan@kernel.org> Signed-off-by: Adrian Bunk <bunk@kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2007-10-17 14:31:36 +08:00
return cap_task_setioprio(p, ioprio);
}
static inline int security_task_getioprio(struct task_struct *p)
{
return 0;
}
static inline int security_task_setrlimit(struct task_struct *p,
unsigned int resource,
struct rlimit *new_rlim)
{
return 0;
}
static inline int security_task_setscheduler(struct task_struct *p)
{
return cap_task_setscheduler(p);
}
static inline int security_task_getscheduler(struct task_struct *p)
{
return 0;
}
static inline int security_task_movememory(struct task_struct *p)
{
return 0;
}
static inline int security_task_kill(struct task_struct *p,
struct siginfo *info, int sig,
u32 secid)
{
return 0;
}
static inline int security_task_wait(struct task_struct *p)
{
return 0;
}
static inline int security_task_prctl(int option, unsigned long arg2,
unsigned long arg3,
unsigned long arg4,
CRED: Inaugurate COW credentials Inaugurate copy-on-write credentials management. This uses RCU to manage the credentials pointer in the task_struct with respect to accesses by other tasks. A process may only modify its own credentials, and so does not need locking to access or modify its own credentials. A mutex (cred_replace_mutex) is added to the task_struct to control the effect of PTRACE_ATTACHED on credential calculations, particularly with respect to execve(). With this patch, the contents of an active credentials struct may not be changed directly; rather a new set of credentials must be prepared, modified and committed using something like the following sequence of events: struct cred *new = prepare_creds(); int ret = blah(new); if (ret < 0) { abort_creds(new); return ret; } return commit_creds(new); There are some exceptions to this rule: the keyrings pointed to by the active credentials may be instantiated - keyrings violate the COW rule as managing COW keyrings is tricky, given that it is possible for a task to directly alter the keys in a keyring in use by another task. To help enforce this, various pointers to sets of credentials, such as those in the task_struct, are declared const. The purpose of this is compile-time discouragement of altering credentials through those pointers. Once a set of credentials has been made public through one of these pointers, it may not be modified, except under special circumstances: (1) Its reference count may incremented and decremented. (2) The keyrings to which it points may be modified, but not replaced. The only safe way to modify anything else is to create a replacement and commit using the functions described in Documentation/credentials.txt (which will be added by a later patch). This patch and the preceding patches have been tested with the LTP SELinux testsuite. This patch makes several logical sets of alteration: (1) execve(). This now prepares and commits credentials in various places in the security code rather than altering the current creds directly. (2) Temporary credential overrides. do_coredump() and sys_faccessat() now prepare their own credentials and temporarily override the ones currently on the acting thread, whilst preventing interference from other threads by holding cred_replace_mutex on the thread being dumped. This will be replaced in a future patch by something that hands down the credentials directly to the functions being called, rather than altering the task's objective credentials. (3) LSM interface. A number of functions have been changed, added or removed: (*) security_capset_check(), ->capset_check() (*) security_capset_set(), ->capset_set() Removed in favour of security_capset(). (*) security_capset(), ->capset() New. This is passed a pointer to the new creds, a pointer to the old creds and the proposed capability sets. It should fill in the new creds or return an error. All pointers, barring the pointer to the new creds, are now const. (*) security_bprm_apply_creds(), ->bprm_apply_creds() Changed; now returns a value, which will cause the process to be killed if it's an error. (*) security_task_alloc(), ->task_alloc_security() Removed in favour of security_prepare_creds(). (*) security_cred_free(), ->cred_free() New. Free security data attached to cred->security. (*) security_prepare_creds(), ->cred_prepare() New. Duplicate any security data attached to cred->security. (*) security_commit_creds(), ->cred_commit() New. Apply any security effects for the upcoming installation of new security by commit_creds(). (*) security_task_post_setuid(), ->task_post_setuid() Removed in favour of security_task_fix_setuid(). (*) security_task_fix_setuid(), ->task_fix_setuid() Fix up the proposed new credentials for setuid(). This is used by cap_set_fix_setuid() to implicitly adjust capabilities in line with setuid() changes. Changes are made to the new credentials, rather than the task itself as in security_task_post_setuid(). (*) security_task_reparent_to_init(), ->task_reparent_to_init() Removed. Instead the task being reparented to init is referred directly to init's credentials. NOTE! This results in the loss of some state: SELinux's osid no longer records the sid of the thread that forked it. (*) security_key_alloc(), ->key_alloc() (*) security_key_permission(), ->key_permission() Changed. These now take cred pointers rather than task pointers to refer to the security context. (4) sys_capset(). This has been simplified and uses less locking. The LSM functions it calls have been merged. (5) reparent_to_kthreadd(). This gives the current thread the same credentials as init by simply using commit_thread() to point that way. (6) __sigqueue_alloc() and switch_uid() __sigqueue_alloc() can't stop the target task from changing its creds beneath it, so this function gets a reference to the currently applicable user_struct which it then passes into the sigqueue struct it returns if successful. switch_uid() is now called from commit_creds(), and possibly should be folded into that. commit_creds() should take care of protecting __sigqueue_alloc(). (7) [sg]et[ug]id() and co and [sg]et_current_groups. The set functions now all use prepare_creds(), commit_creds() and abort_creds() to build and check a new set of credentials before applying it. security_task_set[ug]id() is called inside the prepared section. This guarantees that nothing else will affect the creds until we've finished. The calling of set_dumpable() has been moved into commit_creds(). Much of the functionality of set_user() has been moved into commit_creds(). The get functions all simply access the data directly. (8) security_task_prctl() and cap_task_prctl(). security_task_prctl() has been modified to return -ENOSYS if it doesn't want to handle a function, or otherwise return the return value directly rather than through an argument. Additionally, cap_task_prctl() now prepares a new set of credentials, even if it doesn't end up using it. (9) Keyrings. A number of changes have been made to the keyrings code: (a) switch_uid_keyring(), copy_keys(), exit_keys() and suid_keys() have all been dropped and built in to the credentials functions directly. They may want separating out again later. (b) key_alloc() and search_process_keyrings() now take a cred pointer rather than a task pointer to specify the security context. (c) copy_creds() gives a new thread within the same thread group a new thread keyring if its parent had one, otherwise it discards the thread keyring. (d) The authorisation key now points directly to the credentials to extend the search into rather pointing to the task that carries them. (e) Installing thread, process or session keyrings causes a new set of credentials to be created, even though it's not strictly necessary for process or session keyrings (they're shared). (10) Usermode helper. The usermode helper code now carries a cred struct pointer in its subprocess_info struct instead of a new session keyring pointer. This set of credentials is derived from init_cred and installed on the new process after it has been cloned. call_usermodehelper_setup() allocates the new credentials and call_usermodehelper_freeinfo() discards them if they haven't been used. A special cred function (prepare_usermodeinfo_creds()) is provided specifically for call_usermodehelper_setup() to call. call_usermodehelper_setkeys() adjusts the credentials to sport the supplied keyring as the new session keyring. (11) SELinux. SELinux has a number of changes, in addition to those to support the LSM interface changes mentioned above: (a) selinux_setprocattr() no longer does its check for whether the current ptracer can access processes with the new SID inside the lock that covers getting the ptracer's SID. Whilst this lock ensures that the check is done with the ptracer pinned, the result is only valid until the lock is released, so there's no point doing it inside the lock. (12) is_single_threaded(). This function has been extracted from selinux_setprocattr() and put into a file of its own in the lib/ directory as join_session_keyring() now wants to use it too. The code in SELinux just checked to see whether a task shared mm_structs with other tasks (CLONE_VM), but that isn't good enough. We really want to know if they're part of the same thread group (CLONE_THREAD). (13) nfsd. The NFS server daemon now has to use the COW credentials to set the credentials it is going to use. It really needs to pass the credentials down to the functions it calls, but it can't do that until other patches in this series have been applied. Signed-off-by: David Howells <dhowells@redhat.com> Acked-by: James Morris <jmorris@namei.org> Signed-off-by: James Morris <jmorris@namei.org>
2008-11-14 07:39:23 +08:00
unsigned long arg5)
{
CRED: Inaugurate COW credentials Inaugurate copy-on-write credentials management. This uses RCU to manage the credentials pointer in the task_struct with respect to accesses by other tasks. A process may only modify its own credentials, and so does not need locking to access or modify its own credentials. A mutex (cred_replace_mutex) is added to the task_struct to control the effect of PTRACE_ATTACHED on credential calculations, particularly with respect to execve(). With this patch, the contents of an active credentials struct may not be changed directly; rather a new set of credentials must be prepared, modified and committed using something like the following sequence of events: struct cred *new = prepare_creds(); int ret = blah(new); if (ret < 0) { abort_creds(new); return ret; } return commit_creds(new); There are some exceptions to this rule: the keyrings pointed to by the active credentials may be instantiated - keyrings violate the COW rule as managing COW keyrings is tricky, given that it is possible for a task to directly alter the keys in a keyring in use by another task. To help enforce this, various pointers to sets of credentials, such as those in the task_struct, are declared const. The purpose of this is compile-time discouragement of altering credentials through those pointers. Once a set of credentials has been made public through one of these pointers, it may not be modified, except under special circumstances: (1) Its reference count may incremented and decremented. (2) The keyrings to which it points may be modified, but not replaced. The only safe way to modify anything else is to create a replacement and commit using the functions described in Documentation/credentials.txt (which will be added by a later patch). This patch and the preceding patches have been tested with the LTP SELinux testsuite. This patch makes several logical sets of alteration: (1) execve(). This now prepares and commits credentials in various places in the security code rather than altering the current creds directly. (2) Temporary credential overrides. do_coredump() and sys_faccessat() now prepare their own credentials and temporarily override the ones currently on the acting thread, whilst preventing interference from other threads by holding cred_replace_mutex on the thread being dumped. This will be replaced in a future patch by something that hands down the credentials directly to the functions being called, rather than altering the task's objective credentials. (3) LSM interface. A number of functions have been changed, added or removed: (*) security_capset_check(), ->capset_check() (*) security_capset_set(), ->capset_set() Removed in favour of security_capset(). (*) security_capset(), ->capset() New. This is passed a pointer to the new creds, a pointer to the old creds and the proposed capability sets. It should fill in the new creds or return an error. All pointers, barring the pointer to the new creds, are now const. (*) security_bprm_apply_creds(), ->bprm_apply_creds() Changed; now returns a value, which will cause the process to be killed if it's an error. (*) security_task_alloc(), ->task_alloc_security() Removed in favour of security_prepare_creds(). (*) security_cred_free(), ->cred_free() New. Free security data attached to cred->security. (*) security_prepare_creds(), ->cred_prepare() New. Duplicate any security data attached to cred->security. (*) security_commit_creds(), ->cred_commit() New. Apply any security effects for the upcoming installation of new security by commit_creds(). (*) security_task_post_setuid(), ->task_post_setuid() Removed in favour of security_task_fix_setuid(). (*) security_task_fix_setuid(), ->task_fix_setuid() Fix up the proposed new credentials for setuid(). This is used by cap_set_fix_setuid() to implicitly adjust capabilities in line with setuid() changes. Changes are made to the new credentials, rather than the task itself as in security_task_post_setuid(). (*) security_task_reparent_to_init(), ->task_reparent_to_init() Removed. Instead the task being reparented to init is referred directly to init's credentials. NOTE! This results in the loss of some state: SELinux's osid no longer records the sid of the thread that forked it. (*) security_key_alloc(), ->key_alloc() (*) security_key_permission(), ->key_permission() Changed. These now take cred pointers rather than task pointers to refer to the security context. (4) sys_capset(). This has been simplified and uses less locking. The LSM functions it calls have been merged. (5) reparent_to_kthreadd(). This gives the current thread the same credentials as init by simply using commit_thread() to point that way. (6) __sigqueue_alloc() and switch_uid() __sigqueue_alloc() can't stop the target task from changing its creds beneath it, so this function gets a reference to the currently applicable user_struct which it then passes into the sigqueue struct it returns if successful. switch_uid() is now called from commit_creds(), and possibly should be folded into that. commit_creds() should take care of protecting __sigqueue_alloc(). (7) [sg]et[ug]id() and co and [sg]et_current_groups. The set functions now all use prepare_creds(), commit_creds() and abort_creds() to build and check a new set of credentials before applying it. security_task_set[ug]id() is called inside the prepared section. This guarantees that nothing else will affect the creds until we've finished. The calling of set_dumpable() has been moved into commit_creds(). Much of the functionality of set_user() has been moved into commit_creds(). The get functions all simply access the data directly. (8) security_task_prctl() and cap_task_prctl(). security_task_prctl() has been modified to return -ENOSYS if it doesn't want to handle a function, or otherwise return the return value directly rather than through an argument. Additionally, cap_task_prctl() now prepares a new set of credentials, even if it doesn't end up using it. (9) Keyrings. A number of changes have been made to the keyrings code: (a) switch_uid_keyring(), copy_keys(), exit_keys() and suid_keys() have all been dropped and built in to the credentials functions directly. They may want separating out again later. (b) key_alloc() and search_process_keyrings() now take a cred pointer rather than a task pointer to specify the security context. (c) copy_creds() gives a new thread within the same thread group a new thread keyring if its parent had one, otherwise it discards the thread keyring. (d) The authorisation key now points directly to the credentials to extend the search into rather pointing to the task that carries them. (e) Installing thread, process or session keyrings causes a new set of credentials to be created, even though it's not strictly necessary for process or session keyrings (they're shared). (10) Usermode helper. The usermode helper code now carries a cred struct pointer in its subprocess_info struct instead of a new session keyring pointer. This set of credentials is derived from init_cred and installed on the new process after it has been cloned. call_usermodehelper_setup() allocates the new credentials and call_usermodehelper_freeinfo() discards them if they haven't been used. A special cred function (prepare_usermodeinfo_creds()) is provided specifically for call_usermodehelper_setup() to call. call_usermodehelper_setkeys() adjusts the credentials to sport the supplied keyring as the new session keyring. (11) SELinux. SELinux has a number of changes, in addition to those to support the LSM interface changes mentioned above: (a) selinux_setprocattr() no longer does its check for whether the current ptracer can access processes with the new SID inside the lock that covers getting the ptracer's SID. Whilst this lock ensures that the check is done with the ptracer pinned, the result is only valid until the lock is released, so there's no point doing it inside the lock. (12) is_single_threaded(). This function has been extracted from selinux_setprocattr() and put into a file of its own in the lib/ directory as join_session_keyring() now wants to use it too. The code in SELinux just checked to see whether a task shared mm_structs with other tasks (CLONE_VM), but that isn't good enough. We really want to know if they're part of the same thread group (CLONE_THREAD). (13) nfsd. The NFS server daemon now has to use the COW credentials to set the credentials it is going to use. It really needs to pass the credentials down to the functions it calls, but it can't do that until other patches in this series have been applied. Signed-off-by: David Howells <dhowells@redhat.com> Acked-by: James Morris <jmorris@namei.org> Signed-off-by: James Morris <jmorris@namei.org>
2008-11-14 07:39:23 +08:00
return cap_task_prctl(option, arg2, arg3, arg3, arg5);
}
static inline void security_task_to_inode(struct task_struct *p, struct inode *inode)
{ }
static inline int security_ipc_permission(struct kern_ipc_perm *ipcp,
short flag)
{
return 0;
}
static inline void security_ipc_getsecid(struct kern_ipc_perm *ipcp, u32 *secid)
{
*secid = 0;
}
static inline int security_msg_msg_alloc(struct msg_msg *msg)
{
return 0;
}
static inline void security_msg_msg_free(struct msg_msg *msg)
{ }
static inline int security_msg_queue_alloc(struct msg_queue *msq)
{
return 0;
}
static inline void security_msg_queue_free(struct msg_queue *msq)
{ }
static inline int security_msg_queue_associate(struct msg_queue *msq,
int msqflg)
{
return 0;
}
static inline int security_msg_queue_msgctl(struct msg_queue *msq, int cmd)
{
return 0;
}
static inline int security_msg_queue_msgsnd(struct msg_queue *msq,
struct msg_msg *msg, int msqflg)
{
return 0;
}
static inline int security_msg_queue_msgrcv(struct msg_queue *msq,
struct msg_msg *msg,
struct task_struct *target,
long type, int mode)
{
return 0;
}
static inline int security_shm_alloc(struct shmid_kernel *shp)
{
return 0;
}
static inline void security_shm_free(struct shmid_kernel *shp)
{ }
static inline int security_shm_associate(struct shmid_kernel *shp,
int shmflg)
{
return 0;
}
static inline int security_shm_shmctl(struct shmid_kernel *shp, int cmd)
{
return 0;
}
static inline int security_shm_shmat(struct shmid_kernel *shp,
char __user *shmaddr, int shmflg)
{
return 0;
}
static inline int security_sem_alloc(struct sem_array *sma)
{
return 0;
}
static inline void security_sem_free(struct sem_array *sma)
{ }
static inline int security_sem_associate(struct sem_array *sma, int semflg)
{
return 0;
}
static inline int security_sem_semctl(struct sem_array *sma, int cmd)
{
return 0;
}
static inline int security_sem_semop(struct sem_array *sma,
struct sembuf *sops, unsigned nsops,
int alter)
{
return 0;
}
static inline void security_d_instantiate(struct dentry *dentry, struct inode *inode)
{ }
static inline int security_getprocattr(struct task_struct *p, char *name, char **value)
{
return -EINVAL;
}
static inline int security_setprocattr(struct task_struct *p, char *name, void *value, size_t size)
{
return -EINVAL;
}
static inline int security_netlink_send(struct sock *sk, struct sk_buff *skb)
{
return cap_netlink_send(sk, skb);
}
static inline int security_netlink_recv(struct sk_buff *skb, int cap)
{
return cap_netlink_recv(skb, cap);
}
static inline int security_secid_to_secctx(u32 secid, char **secdata, u32 *seclen)
{
return -EOPNOTSUPP;
}
static inline int security_secctx_to_secid(const char *secdata,
u32 seclen,
u32 *secid)
{
return -EOPNOTSUPP;
}
static inline void security_release_secctx(char *secdata, u32 seclen)
{
}
LSM/SELinux: inode_{get,set,notify}secctx hooks to access LSM security context information. This patch introduces three new hooks. The inode_getsecctx hook is used to get all relevant information from an LSM about an inode. The inode_setsecctx is used to set both the in-core and on-disk state for the inode based on a context derived from inode_getsecctx.The final hook inode_notifysecctx will notify the LSM of a change for the in-core state of the inode in question. These hooks are for use in the labeled NFS code and addresses concerns of how to set security on an inode in a multi-xattr LSM. For historical reasons Stephen Smalley's explanation of the reason for these hooks is pasted below. Quote Stephen Smalley inode_setsecctx: Change the security context of an inode. Updates the in core security context managed by the security module and invokes the fs code as needed (via __vfs_setxattr_noperm) to update any backing xattrs that represent the context. Example usage: NFS server invokes this hook to change the security context in its incore inode and on the backing file system to a value provided by the client on a SETATTR operation. inode_notifysecctx: Notify the security module of what the security context of an inode should be. Initializes the incore security context managed by the security module for this inode. Example usage: NFS client invokes this hook to initialize the security context in its incore inode to the value provided by the server for the file when the server returned the file's attributes to the client. Signed-off-by: David P. Quigley <dpquigl@tycho.nsa.gov> Acked-by: Serge Hallyn <serue@us.ibm.com> Signed-off-by: James Morris <jmorris@namei.org>
2009-09-04 02:25:57 +08:00
static inline int security_inode_notifysecctx(struct inode *inode, void *ctx, u32 ctxlen)
{
return -EOPNOTSUPP;
}
static inline int security_inode_setsecctx(struct dentry *dentry, void *ctx, u32 ctxlen)
{
return -EOPNOTSUPP;
}
static inline int security_inode_getsecctx(struct inode *inode, void **ctx, u32 *ctxlen)
{
return -EOPNOTSUPP;
}
#endif /* CONFIG_SECURITY */
#ifdef CONFIG_SECURITY_NETWORK
int security_unix_stream_connect(struct sock *sock, struct sock *other, struct sock *newsk);
int security_unix_may_send(struct socket *sock, struct socket *other);
int security_socket_create(int family, int type, int protocol, int kern);
int security_socket_post_create(struct socket *sock, int family,
int type, int protocol, int kern);
int security_socket_bind(struct socket *sock, struct sockaddr *address, int addrlen);
int security_socket_connect(struct socket *sock, struct sockaddr *address, int addrlen);
int security_socket_listen(struct socket *sock, int backlog);
int security_socket_accept(struct socket *sock, struct socket *newsock);
int security_socket_sendmsg(struct socket *sock, struct msghdr *msg, int size);
int security_socket_recvmsg(struct socket *sock, struct msghdr *msg,
int size, int flags);
int security_socket_getsockname(struct socket *sock);
int security_socket_getpeername(struct socket *sock);
int security_socket_getsockopt(struct socket *sock, int level, int optname);
int security_socket_setsockopt(struct socket *sock, int level, int optname);
int security_socket_shutdown(struct socket *sock, int how);
int security_sock_rcv_skb(struct sock *sk, struct sk_buff *skb);
int security_socket_getpeersec_stream(struct socket *sock, char __user *optval,
int __user *optlen, unsigned len);
int security_socket_getpeersec_dgram(struct socket *sock, struct sk_buff *skb, u32 *secid);
int security_sk_alloc(struct sock *sk, int family, gfp_t priority);
void security_sk_free(struct sock *sk);
void security_sk_clone(const struct sock *sk, struct sock *newsk);
void security_sk_classify_flow(struct sock *sk, struct flowi *fl);
void security_req_classify_flow(const struct request_sock *req, struct flowi *fl);
void security_sock_graft(struct sock*sk, struct socket *parent);
int security_inet_conn_request(struct sock *sk,
struct sk_buff *skb, struct request_sock *req);
void security_inet_csk_clone(struct sock *newsk,
const struct request_sock *req);
void security_inet_conn_established(struct sock *sk,
struct sk_buff *skb);
int security_secmark_relabel_packet(u32 secid);
void security_secmark_refcount_inc(void);
void security_secmark_refcount_dec(void);
int security_tun_dev_create(void);
void security_tun_dev_post_create(struct sock *sk);
int security_tun_dev_attach(struct sock *sk);
#else /* CONFIG_SECURITY_NETWORK */
static inline int security_unix_stream_connect(struct sock *sock,
struct sock *other,
struct sock *newsk)
{
return 0;
}
static inline int security_unix_may_send(struct socket *sock,
struct socket *other)
{
return 0;
}
static inline int security_socket_create(int family, int type,
int protocol, int kern)
{
return 0;
}
static inline int security_socket_post_create(struct socket *sock,
int family,
int type,
int protocol, int kern)
{
return 0;
}
static inline int security_socket_bind(struct socket *sock,
struct sockaddr *address,
int addrlen)
{
return 0;
}
static inline int security_socket_connect(struct socket *sock,
struct sockaddr *address,
int addrlen)
{
return 0;
}
static inline int security_socket_listen(struct socket *sock, int backlog)
{
return 0;
}
static inline int security_socket_accept(struct socket *sock,
struct socket *newsock)
{
return 0;
}
static inline int security_socket_sendmsg(struct socket *sock,
struct msghdr *msg, int size)
{
return 0;
}
static inline int security_socket_recvmsg(struct socket *sock,
struct msghdr *msg, int size,
int flags)
{
return 0;
}
static inline int security_socket_getsockname(struct socket *sock)
{
return 0;
}
static inline int security_socket_getpeername(struct socket *sock)
{
return 0;
}
static inline int security_socket_getsockopt(struct socket *sock,
int level, int optname)
{
return 0;
}
static inline int security_socket_setsockopt(struct socket *sock,
int level, int optname)
{
return 0;
}
static inline int security_socket_shutdown(struct socket *sock, int how)
{
return 0;
}
static inline int security_sock_rcv_skb(struct sock *sk,
struct sk_buff *skb)
{
return 0;
}
[SECURITY]: TCP/UDP getpeersec This patch implements an application of the LSM-IPSec networking controls whereby an application can determine the label of the security association its TCP or UDP sockets are currently connected to via getsockopt and the auxiliary data mechanism of recvmsg. Patch purpose: This patch enables a security-aware application to retrieve the security context of an IPSec security association a particular TCP or UDP socket is using. The application can then use this security context to determine the security context for processing on behalf of the peer at the other end of this connection. In the case of UDP, the security context is for each individual packet. An example application is the inetd daemon, which could be modified to start daemons running at security contexts dependent on the remote client. Patch design approach: - Design for TCP The patch enables the SELinux LSM to set the peer security context for a socket based on the security context of the IPSec security association. The application may retrieve this context using getsockopt. When called, the kernel determines if the socket is a connected (TCP_ESTABLISHED) TCP socket and, if so, uses the dst_entry cache on the socket to retrieve the security associations. If a security association has a security context, the context string is returned, as for UNIX domain sockets. - Design for UDP Unlike TCP, UDP is connectionless. This requires a somewhat different API to retrieve the peer security context. With TCP, the peer security context stays the same throughout the connection, thus it can be retrieved at any time between when the connection is established and when it is torn down. With UDP, each read/write can have different peer and thus the security context might change every time. As a result the security context retrieval must be done TOGETHER with the packet retrieval. The solution is to build upon the existing Unix domain socket API for retrieving user credentials. Linux offers the API for obtaining user credentials via ancillary messages (i.e., out of band/control messages that are bundled together with a normal message). Patch implementation details: - Implementation for TCP The security context can be retrieved by applications using getsockopt with the existing SO_PEERSEC flag. As an example (ignoring error checking): getsockopt(sockfd, SOL_SOCKET, SO_PEERSEC, optbuf, &optlen); printf("Socket peer context is: %s\n", optbuf); The SELinux function, selinux_socket_getpeersec, is extended to check for labeled security associations for connected (TCP_ESTABLISHED == sk->sk_state) TCP sockets only. If so, the socket has a dst_cache of struct dst_entry values that may refer to security associations. If these have security associations with security contexts, the security context is returned. getsockopt returns a buffer that contains a security context string or the buffer is unmodified. - Implementation for UDP To retrieve the security context, the application first indicates to the kernel such desire by setting the IP_PASSSEC option via getsockopt. Then the application retrieves the security context using the auxiliary data mechanism. An example server application for UDP should look like this: toggle = 1; toggle_len = sizeof(toggle); setsockopt(sockfd, SOL_IP, IP_PASSSEC, &toggle, &toggle_len); recvmsg(sockfd, &msg_hdr, 0); if (msg_hdr.msg_controllen > sizeof(struct cmsghdr)) { cmsg_hdr = CMSG_FIRSTHDR(&msg_hdr); if (cmsg_hdr->cmsg_len <= CMSG_LEN(sizeof(scontext)) && cmsg_hdr->cmsg_level == SOL_IP && cmsg_hdr->cmsg_type == SCM_SECURITY) { memcpy(&scontext, CMSG_DATA(cmsg_hdr), sizeof(scontext)); } } ip_setsockopt is enhanced with a new socket option IP_PASSSEC to allow a server socket to receive security context of the peer. A new ancillary message type SCM_SECURITY. When the packet is received we get the security context from the sec_path pointer which is contained in the sk_buff, and copy it to the ancillary message space. An additional LSM hook, selinux_socket_getpeersec_udp, is defined to retrieve the security context from the SELinux space. The existing function, selinux_socket_getpeersec does not suit our purpose, because the security context is copied directly to user space, rather than to kernel space. Testing: We have tested the patch by setting up TCP and UDP connections between applications on two machines using the IPSec policies that result in labeled security associations being built. For TCP, we can then extract the peer security context using getsockopt on either end. For UDP, the receiving end can retrieve the security context using the auxiliary data mechanism of recvmsg. Signed-off-by: Catherine Zhang <cxzhang@watson.ibm.com> Acked-by: James Morris <jmorris@namei.org> Acked-by: Herbert Xu <herbert@gondor.apana.org.au> Signed-off-by: David S. Miller <davem@davemloft.net>
2006-03-21 14:41:23 +08:00
static inline int security_socket_getpeersec_stream(struct socket *sock, char __user *optval,
int __user *optlen, unsigned len)
{
return -ENOPROTOOPT;
}
static inline int security_socket_getpeersec_dgram(struct socket *sock, struct sk_buff *skb, u32 *secid)
{
return -ENOPROTOOPT;
}
static inline int security_sk_alloc(struct sock *sk, int family, gfp_t priority)
{
return 0;
}
static inline void security_sk_free(struct sock *sk)
{
}
static inline void security_sk_clone(const struct sock *sk, struct sock *newsk)
{
}
[LSM-IPSec]: Security association restriction. This patch series implements per packet access control via the extension of the Linux Security Modules (LSM) interface by hooks in the XFRM and pfkey subsystems that leverage IPSec security associations to label packets. Extensions to the SELinux LSM are included that leverage the patch for this purpose. This patch implements the changes necessary to the XFRM subsystem, pfkey interface, ipv4/ipv6, and xfrm_user interface to restrict a socket to use only authorized security associations (or no security association) to send/receive network packets. Patch purpose: The patch is designed to enable access control per packets based on the strongly authenticated IPSec security association. Such access controls augment the existing ones based on network interface and IP address. The former are very coarse-grained, and the latter can be spoofed. By using IPSec, the system can control access to remote hosts based on cryptographic keys generated using the IPSec mechanism. This enables access control on a per-machine basis or per-application if the remote machine is running the same mechanism and trusted to enforce the access control policy. Patch design approach: The overall approach is that policy (xfrm_policy) entries set by user-level programs (e.g., setkey for ipsec-tools) are extended with a security context that is used at policy selection time in the XFRM subsystem to restrict the sockets that can send/receive packets via security associations (xfrm_states) that are built from those policies. A presentation available at www.selinux-symposium.org/2005/presentations/session2/2-3-jaeger.pdf from the SELinux symposium describes the overall approach. Patch implementation details: On output, the policy retrieved (via xfrm_policy_lookup or xfrm_sk_policy_lookup) must be authorized for the security context of the socket and the same security context is required for resultant security association (retrieved or negotiated via racoon in ipsec-tools). This is enforced in xfrm_state_find. On input, the policy retrieved must also be authorized for the socket (at __xfrm_policy_check), and the security context of the policy must also match the security association being used. The patch has virtually no impact on packets that do not use IPSec. The existing Netfilter (outgoing) and LSM rcv_skb hooks are used as before. Also, if IPSec is used without security contexts, the impact is minimal. The LSM must allow such policies to be selected for the combination of socket and remote machine, but subsequent IPSec processing proceeds as in the original case. Testing: The pfkey interface is tested using the ipsec-tools. ipsec-tools have been modified (a separate ipsec-tools patch is available for version 0.5) that supports assignment of xfrm_policy entries and security associations with security contexts via setkey and the negotiation using the security contexts via racoon. The xfrm_user interface is tested via ad hoc programs that set security contexts. These programs are also available from me, and contain programs for setting, getting, and deleting policy for testing this interface. Testing of sa functions was done by tracing kernel behavior. Signed-off-by: Trent Jaeger <tjaeger@cse.psu.edu> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> Signed-off-by: David S. Miller <davem@davemloft.net>
2005-12-14 15:12:27 +08:00
static inline void security_sk_classify_flow(struct sock *sk, struct flowi *fl)
[LSM-IPSec]: Security association restriction. This patch series implements per packet access control via the extension of the Linux Security Modules (LSM) interface by hooks in the XFRM and pfkey subsystems that leverage IPSec security associations to label packets. Extensions to the SELinux LSM are included that leverage the patch for this purpose. This patch implements the changes necessary to the XFRM subsystem, pfkey interface, ipv4/ipv6, and xfrm_user interface to restrict a socket to use only authorized security associations (or no security association) to send/receive network packets. Patch purpose: The patch is designed to enable access control per packets based on the strongly authenticated IPSec security association. Such access controls augment the existing ones based on network interface and IP address. The former are very coarse-grained, and the latter can be spoofed. By using IPSec, the system can control access to remote hosts based on cryptographic keys generated using the IPSec mechanism. This enables access control on a per-machine basis or per-application if the remote machine is running the same mechanism and trusted to enforce the access control policy. Patch design approach: The overall approach is that policy (xfrm_policy) entries set by user-level programs (e.g., setkey for ipsec-tools) are extended with a security context that is used at policy selection time in the XFRM subsystem to restrict the sockets that can send/receive packets via security associations (xfrm_states) that are built from those policies. A presentation available at www.selinux-symposium.org/2005/presentations/session2/2-3-jaeger.pdf from the SELinux symposium describes the overall approach. Patch implementation details: On output, the policy retrieved (via xfrm_policy_lookup or xfrm_sk_policy_lookup) must be authorized for the security context of the socket and the same security context is required for resultant security association (retrieved or negotiated via racoon in ipsec-tools). This is enforced in xfrm_state_find. On input, the policy retrieved must also be authorized for the socket (at __xfrm_policy_check), and the security context of the policy must also match the security association being used. The patch has virtually no impact on packets that do not use IPSec. The existing Netfilter (outgoing) and LSM rcv_skb hooks are used as before. Also, if IPSec is used without security contexts, the impact is minimal. The LSM must allow such policies to be selected for the combination of socket and remote machine, but subsequent IPSec processing proceeds as in the original case. Testing: The pfkey interface is tested using the ipsec-tools. ipsec-tools have been modified (a separate ipsec-tools patch is available for version 0.5) that supports assignment of xfrm_policy entries and security associations with security contexts via setkey and the negotiation using the security contexts via racoon. The xfrm_user interface is tested via ad hoc programs that set security contexts. These programs are also available from me, and contain programs for setting, getting, and deleting policy for testing this interface. Testing of sa functions was done by tracing kernel behavior. Signed-off-by: Trent Jaeger <tjaeger@cse.psu.edu> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> Signed-off-by: David S. Miller <davem@davemloft.net>
2005-12-14 15:12:27 +08:00
{
}
static inline void security_req_classify_flow(const struct request_sock *req, struct flowi *fl)
{
}
static inline void security_sock_graft(struct sock *sk, struct socket *parent)
{
}
static inline int security_inet_conn_request(struct sock *sk,
struct sk_buff *skb, struct request_sock *req)
{
return 0;
}
static inline void security_inet_csk_clone(struct sock *newsk,
const struct request_sock *req)
{
}
static inline void security_inet_conn_established(struct sock *sk,
struct sk_buff *skb)
{
}
static inline int security_secmark_relabel_packet(u32 secid)
{
return 0;
}
static inline void security_secmark_refcount_inc(void)
{
}
static inline void security_secmark_refcount_dec(void)
{
}
static inline int security_tun_dev_create(void)
{
return 0;
}
static inline void security_tun_dev_post_create(struct sock *sk)
{
}
static inline int security_tun_dev_attach(struct sock *sk)
{
return 0;
}
#endif /* CONFIG_SECURITY_NETWORK */
[LSM-IPSec]: Security association restriction. This patch series implements per packet access control via the extension of the Linux Security Modules (LSM) interface by hooks in the XFRM and pfkey subsystems that leverage IPSec security associations to label packets. Extensions to the SELinux LSM are included that leverage the patch for this purpose. This patch implements the changes necessary to the XFRM subsystem, pfkey interface, ipv4/ipv6, and xfrm_user interface to restrict a socket to use only authorized security associations (or no security association) to send/receive network packets. Patch purpose: The patch is designed to enable access control per packets based on the strongly authenticated IPSec security association. Such access controls augment the existing ones based on network interface and IP address. The former are very coarse-grained, and the latter can be spoofed. By using IPSec, the system can control access to remote hosts based on cryptographic keys generated using the IPSec mechanism. This enables access control on a per-machine basis or per-application if the remote machine is running the same mechanism and trusted to enforce the access control policy. Patch design approach: The overall approach is that policy (xfrm_policy) entries set by user-level programs (e.g., setkey for ipsec-tools) are extended with a security context that is used at policy selection time in the XFRM subsystem to restrict the sockets that can send/receive packets via security associations (xfrm_states) that are built from those policies. A presentation available at www.selinux-symposium.org/2005/presentations/session2/2-3-jaeger.pdf from the SELinux symposium describes the overall approach. Patch implementation details: On output, the policy retrieved (via xfrm_policy_lookup or xfrm_sk_policy_lookup) must be authorized for the security context of the socket and the same security context is required for resultant security association (retrieved or negotiated via racoon in ipsec-tools). This is enforced in xfrm_state_find. On input, the policy retrieved must also be authorized for the socket (at __xfrm_policy_check), and the security context of the policy must also match the security association being used. The patch has virtually no impact on packets that do not use IPSec. The existing Netfilter (outgoing) and LSM rcv_skb hooks are used as before. Also, if IPSec is used without security contexts, the impact is minimal. The LSM must allow such policies to be selected for the combination of socket and remote machine, but subsequent IPSec processing proceeds as in the original case. Testing: The pfkey interface is tested using the ipsec-tools. ipsec-tools have been modified (a separate ipsec-tools patch is available for version 0.5) that supports assignment of xfrm_policy entries and security associations with security contexts via setkey and the negotiation using the security contexts via racoon. The xfrm_user interface is tested via ad hoc programs that set security contexts. These programs are also available from me, and contain programs for setting, getting, and deleting policy for testing this interface. Testing of sa functions was done by tracing kernel behavior. Signed-off-by: Trent Jaeger <tjaeger@cse.psu.edu> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> Signed-off-by: David S. Miller <davem@davemloft.net>
2005-12-14 15:12:27 +08:00
#ifdef CONFIG_SECURITY_NETWORK_XFRM
int security_xfrm_policy_alloc(struct xfrm_sec_ctx **ctxp, struct xfrm_user_sec_ctx *sec_ctx);
int security_xfrm_policy_clone(struct xfrm_sec_ctx *old_ctx, struct xfrm_sec_ctx **new_ctxp);
void security_xfrm_policy_free(struct xfrm_sec_ctx *ctx);
int security_xfrm_policy_delete(struct xfrm_sec_ctx *ctx);
int security_xfrm_state_alloc(struct xfrm_state *x, struct xfrm_user_sec_ctx *sec_ctx);
int security_xfrm_state_alloc_acquire(struct xfrm_state *x,
struct xfrm_sec_ctx *polsec, u32 secid);
int security_xfrm_state_delete(struct xfrm_state *x);
void security_xfrm_state_free(struct xfrm_state *x);
int security_xfrm_policy_lookup(struct xfrm_sec_ctx *ctx, u32 fl_secid, u8 dir);
int security_xfrm_state_pol_flow_match(struct xfrm_state *x,
struct xfrm_policy *xp, struct flowi *fl);
int security_xfrm_decode_session(struct sk_buff *skb, u32 *secid);
void security_skb_classify_flow(struct sk_buff *skb, struct flowi *fl);
[LSM-IPSec]: Security association restriction. This patch series implements per packet access control via the extension of the Linux Security Modules (LSM) interface by hooks in the XFRM and pfkey subsystems that leverage IPSec security associations to label packets. Extensions to the SELinux LSM are included that leverage the patch for this purpose. This patch implements the changes necessary to the XFRM subsystem, pfkey interface, ipv4/ipv6, and xfrm_user interface to restrict a socket to use only authorized security associations (or no security association) to send/receive network packets. Patch purpose: The patch is designed to enable access control per packets based on the strongly authenticated IPSec security association. Such access controls augment the existing ones based on network interface and IP address. The former are very coarse-grained, and the latter can be spoofed. By using IPSec, the system can control access to remote hosts based on cryptographic keys generated using the IPSec mechanism. This enables access control on a per-machine basis or per-application if the remote machine is running the same mechanism and trusted to enforce the access control policy. Patch design approach: The overall approach is that policy (xfrm_policy) entries set by user-level programs (e.g., setkey for ipsec-tools) are extended with a security context that is used at policy selection time in the XFRM subsystem to restrict the sockets that can send/receive packets via security associations (xfrm_states) that are built from those policies. A presentation available at www.selinux-symposium.org/2005/presentations/session2/2-3-jaeger.pdf from the SELinux symposium describes the overall approach. Patch implementation details: On output, the policy retrieved (via xfrm_policy_lookup or xfrm_sk_policy_lookup) must be authorized for the security context of the socket and the same security context is required for resultant security association (retrieved or negotiated via racoon in ipsec-tools). This is enforced in xfrm_state_find. On input, the policy retrieved must also be authorized for the socket (at __xfrm_policy_check), and the security context of the policy must also match the security association being used. The patch has virtually no impact on packets that do not use IPSec. The existing Netfilter (outgoing) and LSM rcv_skb hooks are used as before. Also, if IPSec is used without security contexts, the impact is minimal. The LSM must allow such policies to be selected for the combination of socket and remote machine, but subsequent IPSec processing proceeds as in the original case. Testing: The pfkey interface is tested using the ipsec-tools. ipsec-tools have been modified (a separate ipsec-tools patch is available for version 0.5) that supports assignment of xfrm_policy entries and security associations with security contexts via setkey and the negotiation using the security contexts via racoon. The xfrm_user interface is tested via ad hoc programs that set security contexts. These programs are also available from me, and contain programs for setting, getting, and deleting policy for testing this interface. Testing of sa functions was done by tracing kernel behavior. Signed-off-by: Trent Jaeger <tjaeger@cse.psu.edu> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> Signed-off-by: David S. Miller <davem@davemloft.net>
2005-12-14 15:12:27 +08:00
#else /* CONFIG_SECURITY_NETWORK_XFRM */
static inline int security_xfrm_policy_alloc(struct xfrm_sec_ctx **ctxp, struct xfrm_user_sec_ctx *sec_ctx)
[LSM-IPSec]: Security association restriction. This patch series implements per packet access control via the extension of the Linux Security Modules (LSM) interface by hooks in the XFRM and pfkey subsystems that leverage IPSec security associations to label packets. Extensions to the SELinux LSM are included that leverage the patch for this purpose. This patch implements the changes necessary to the XFRM subsystem, pfkey interface, ipv4/ipv6, and xfrm_user interface to restrict a socket to use only authorized security associations (or no security association) to send/receive network packets. Patch purpose: The patch is designed to enable access control per packets based on the strongly authenticated IPSec security association. Such access controls augment the existing ones based on network interface and IP address. The former are very coarse-grained, and the latter can be spoofed. By using IPSec, the system can control access to remote hosts based on cryptographic keys generated using the IPSec mechanism. This enables access control on a per-machine basis or per-application if the remote machine is running the same mechanism and trusted to enforce the access control policy. Patch design approach: The overall approach is that policy (xfrm_policy) entries set by user-level programs (e.g., setkey for ipsec-tools) are extended with a security context that is used at policy selection time in the XFRM subsystem to restrict the sockets that can send/receive packets via security associations (xfrm_states) that are built from those policies. A presentation available at www.selinux-symposium.org/2005/presentations/session2/2-3-jaeger.pdf from the SELinux symposium describes the overall approach. Patch implementation details: On output, the policy retrieved (via xfrm_policy_lookup or xfrm_sk_policy_lookup) must be authorized for the security context of the socket and the same security context is required for resultant security association (retrieved or negotiated via racoon in ipsec-tools). This is enforced in xfrm_state_find. On input, the policy retrieved must also be authorized for the socket (at __xfrm_policy_check), and the security context of the policy must also match the security association being used. The patch has virtually no impact on packets that do not use IPSec. The existing Netfilter (outgoing) and LSM rcv_skb hooks are used as before. Also, if IPSec is used without security contexts, the impact is minimal. The LSM must allow such policies to be selected for the combination of socket and remote machine, but subsequent IPSec processing proceeds as in the original case. Testing: The pfkey interface is tested using the ipsec-tools. ipsec-tools have been modified (a separate ipsec-tools patch is available for version 0.5) that supports assignment of xfrm_policy entries and security associations with security contexts via setkey and the negotiation using the security contexts via racoon. The xfrm_user interface is tested via ad hoc programs that set security contexts. These programs are also available from me, and contain programs for setting, getting, and deleting policy for testing this interface. Testing of sa functions was done by tracing kernel behavior. Signed-off-by: Trent Jaeger <tjaeger@cse.psu.edu> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> Signed-off-by: David S. Miller <davem@davemloft.net>
2005-12-14 15:12:27 +08:00
{
return 0;
}
static inline int security_xfrm_policy_clone(struct xfrm_sec_ctx *old, struct xfrm_sec_ctx **new_ctxp)
[LSM-IPSec]: Security association restriction. This patch series implements per packet access control via the extension of the Linux Security Modules (LSM) interface by hooks in the XFRM and pfkey subsystems that leverage IPSec security associations to label packets. Extensions to the SELinux LSM are included that leverage the patch for this purpose. This patch implements the changes necessary to the XFRM subsystem, pfkey interface, ipv4/ipv6, and xfrm_user interface to restrict a socket to use only authorized security associations (or no security association) to send/receive network packets. Patch purpose: The patch is designed to enable access control per packets based on the strongly authenticated IPSec security association. Such access controls augment the existing ones based on network interface and IP address. The former are very coarse-grained, and the latter can be spoofed. By using IPSec, the system can control access to remote hosts based on cryptographic keys generated using the IPSec mechanism. This enables access control on a per-machine basis or per-application if the remote machine is running the same mechanism and trusted to enforce the access control policy. Patch design approach: The overall approach is that policy (xfrm_policy) entries set by user-level programs (e.g., setkey for ipsec-tools) are extended with a security context that is used at policy selection time in the XFRM subsystem to restrict the sockets that can send/receive packets via security associations (xfrm_states) that are built from those policies. A presentation available at www.selinux-symposium.org/2005/presentations/session2/2-3-jaeger.pdf from the SELinux symposium describes the overall approach. Patch implementation details: On output, the policy retrieved (via xfrm_policy_lookup or xfrm_sk_policy_lookup) must be authorized for the security context of the socket and the same security context is required for resultant security association (retrieved or negotiated via racoon in ipsec-tools). This is enforced in xfrm_state_find. On input, the policy retrieved must also be authorized for the socket (at __xfrm_policy_check), and the security context of the policy must also match the security association being used. The patch has virtually no impact on packets that do not use IPSec. The existing Netfilter (outgoing) and LSM rcv_skb hooks are used as before. Also, if IPSec is used without security contexts, the impact is minimal. The LSM must allow such policies to be selected for the combination of socket and remote machine, but subsequent IPSec processing proceeds as in the original case. Testing: The pfkey interface is tested using the ipsec-tools. ipsec-tools have been modified (a separate ipsec-tools patch is available for version 0.5) that supports assignment of xfrm_policy entries and security associations with security contexts via setkey and the negotiation using the security contexts via racoon. The xfrm_user interface is tested via ad hoc programs that set security contexts. These programs are also available from me, and contain programs for setting, getting, and deleting policy for testing this interface. Testing of sa functions was done by tracing kernel behavior. Signed-off-by: Trent Jaeger <tjaeger@cse.psu.edu> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> Signed-off-by: David S. Miller <davem@davemloft.net>
2005-12-14 15:12:27 +08:00
{
return 0;
}
static inline void security_xfrm_policy_free(struct xfrm_sec_ctx *ctx)
[LSM-IPSec]: Security association restriction. This patch series implements per packet access control via the extension of the Linux Security Modules (LSM) interface by hooks in the XFRM and pfkey subsystems that leverage IPSec security associations to label packets. Extensions to the SELinux LSM are included that leverage the patch for this purpose. This patch implements the changes necessary to the XFRM subsystem, pfkey interface, ipv4/ipv6, and xfrm_user interface to restrict a socket to use only authorized security associations (or no security association) to send/receive network packets. Patch purpose: The patch is designed to enable access control per packets based on the strongly authenticated IPSec security association. Such access controls augment the existing ones based on network interface and IP address. The former are very coarse-grained, and the latter can be spoofed. By using IPSec, the system can control access to remote hosts based on cryptographic keys generated using the IPSec mechanism. This enables access control on a per-machine basis or per-application if the remote machine is running the same mechanism and trusted to enforce the access control policy. Patch design approach: The overall approach is that policy (xfrm_policy) entries set by user-level programs (e.g., setkey for ipsec-tools) are extended with a security context that is used at policy selection time in the XFRM subsystem to restrict the sockets that can send/receive packets via security associations (xfrm_states) that are built from those policies. A presentation available at www.selinux-symposium.org/2005/presentations/session2/2-3-jaeger.pdf from the SELinux symposium describes the overall approach. Patch implementation details: On output, the policy retrieved (via xfrm_policy_lookup or xfrm_sk_policy_lookup) must be authorized for the security context of the socket and the same security context is required for resultant security association (retrieved or negotiated via racoon in ipsec-tools). This is enforced in xfrm_state_find. On input, the policy retrieved must also be authorized for the socket (at __xfrm_policy_check), and the security context of the policy must also match the security association being used. The patch has virtually no impact on packets that do not use IPSec. The existing Netfilter (outgoing) and LSM rcv_skb hooks are used as before. Also, if IPSec is used without security contexts, the impact is minimal. The LSM must allow such policies to be selected for the combination of socket and remote machine, but subsequent IPSec processing proceeds as in the original case. Testing: The pfkey interface is tested using the ipsec-tools. ipsec-tools have been modified (a separate ipsec-tools patch is available for version 0.5) that supports assignment of xfrm_policy entries and security associations with security contexts via setkey and the negotiation using the security contexts via racoon. The xfrm_user interface is tested via ad hoc programs that set security contexts. These programs are also available from me, and contain programs for setting, getting, and deleting policy for testing this interface. Testing of sa functions was done by tracing kernel behavior. Signed-off-by: Trent Jaeger <tjaeger@cse.psu.edu> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> Signed-off-by: David S. Miller <davem@davemloft.net>
2005-12-14 15:12:27 +08:00
{
}
static inline int security_xfrm_policy_delete(struct xfrm_sec_ctx *ctx)
[LSM-IPsec]: SELinux Authorize This patch contains a fix for the previous patch that adds security contexts to IPsec policies and security associations. In the previous patch, no authorization (besides the check for write permissions to SAD and SPD) is required to delete IPsec policies and security assocations with security contexts. Thus a user authorized to change SAD and SPD can bypass the IPsec policy authorization by simply deleteing policies with security contexts. To fix this security hole, an additional authorization check is added for removing security policies and security associations with security contexts. Note that if no security context is supplied on add or present on policy to be deleted, the SELinux module allows the change unconditionally. The hook is called on deletion when no context is present, which we may want to change. At present, I left it up to the module. LSM changes: The patch adds two new LSM hooks: xfrm_policy_delete and xfrm_state_delete. The new hooks are necessary to authorize deletion of IPsec policies that have security contexts. The existing hooks xfrm_policy_free and xfrm_state_free lack the context to do the authorization, so I decided to split authorization of deletion and memory management of security data, as is typical in the LSM interface. Use: The new delete hooks are checked when xfrm_policy or xfrm_state are deleted by either the xfrm_user interface (xfrm_get_policy, xfrm_del_sa) or the pfkey interface (pfkey_spddelete, pfkey_delete). SELinux changes: The new policy_delete and state_delete functions are added. Signed-off-by: Catherine Zhang <cxzhang@watson.ibm.com> Signed-off-by: Trent Jaeger <tjaeger@cse.psu.edu> Acked-by: James Morris <jmorris@namei.org> Signed-off-by: David S. Miller <davem@davemloft.net>
2006-06-09 14:39:49 +08:00
{
return 0;
}
static inline int security_xfrm_state_alloc(struct xfrm_state *x,
struct xfrm_user_sec_ctx *sec_ctx)
{
return 0;
}
static inline int security_xfrm_state_alloc_acquire(struct xfrm_state *x,
struct xfrm_sec_ctx *polsec, u32 secid)
[LSM-IPSec]: Security association restriction. This patch series implements per packet access control via the extension of the Linux Security Modules (LSM) interface by hooks in the XFRM and pfkey subsystems that leverage IPSec security associations to label packets. Extensions to the SELinux LSM are included that leverage the patch for this purpose. This patch implements the changes necessary to the XFRM subsystem, pfkey interface, ipv4/ipv6, and xfrm_user interface to restrict a socket to use only authorized security associations (or no security association) to send/receive network packets. Patch purpose: The patch is designed to enable access control per packets based on the strongly authenticated IPSec security association. Such access controls augment the existing ones based on network interface and IP address. The former are very coarse-grained, and the latter can be spoofed. By using IPSec, the system can control access to remote hosts based on cryptographic keys generated using the IPSec mechanism. This enables access control on a per-machine basis or per-application if the remote machine is running the same mechanism and trusted to enforce the access control policy. Patch design approach: The overall approach is that policy (xfrm_policy) entries set by user-level programs (e.g., setkey for ipsec-tools) are extended with a security context that is used at policy selection time in the XFRM subsystem to restrict the sockets that can send/receive packets via security associations (xfrm_states) that are built from those policies. A presentation available at www.selinux-symposium.org/2005/presentations/session2/2-3-jaeger.pdf from the SELinux symposium describes the overall approach. Patch implementation details: On output, the policy retrieved (via xfrm_policy_lookup or xfrm_sk_policy_lookup) must be authorized for the security context of the socket and the same security context is required for resultant security association (retrieved or negotiated via racoon in ipsec-tools). This is enforced in xfrm_state_find. On input, the policy retrieved must also be authorized for the socket (at __xfrm_policy_check), and the security context of the policy must also match the security association being used. The patch has virtually no impact on packets that do not use IPSec. The existing Netfilter (outgoing) and LSM rcv_skb hooks are used as before. Also, if IPSec is used without security contexts, the impact is minimal. The LSM must allow such policies to be selected for the combination of socket and remote machine, but subsequent IPSec processing proceeds as in the original case. Testing: The pfkey interface is tested using the ipsec-tools. ipsec-tools have been modified (a separate ipsec-tools patch is available for version 0.5) that supports assignment of xfrm_policy entries and security associations with security contexts via setkey and the negotiation using the security contexts via racoon. The xfrm_user interface is tested via ad hoc programs that set security contexts. These programs are also available from me, and contain programs for setting, getting, and deleting policy for testing this interface. Testing of sa functions was done by tracing kernel behavior. Signed-off-by: Trent Jaeger <tjaeger@cse.psu.edu> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> Signed-off-by: David S. Miller <davem@davemloft.net>
2005-12-14 15:12:27 +08:00
{
return 0;
}
static inline void security_xfrm_state_free(struct xfrm_state *x)
{
}
static inline int security_xfrm_state_delete(struct xfrm_state *x)
[LSM-IPsec]: SELinux Authorize This patch contains a fix for the previous patch that adds security contexts to IPsec policies and security associations. In the previous patch, no authorization (besides the check for write permissions to SAD and SPD) is required to delete IPsec policies and security assocations with security contexts. Thus a user authorized to change SAD and SPD can bypass the IPsec policy authorization by simply deleteing policies with security contexts. To fix this security hole, an additional authorization check is added for removing security policies and security associations with security contexts. Note that if no security context is supplied on add or present on policy to be deleted, the SELinux module allows the change unconditionally. The hook is called on deletion when no context is present, which we may want to change. At present, I left it up to the module. LSM changes: The patch adds two new LSM hooks: xfrm_policy_delete and xfrm_state_delete. The new hooks are necessary to authorize deletion of IPsec policies that have security contexts. The existing hooks xfrm_policy_free and xfrm_state_free lack the context to do the authorization, so I decided to split authorization of deletion and memory management of security data, as is typical in the LSM interface. Use: The new delete hooks are checked when xfrm_policy or xfrm_state are deleted by either the xfrm_user interface (xfrm_get_policy, xfrm_del_sa) or the pfkey interface (pfkey_spddelete, pfkey_delete). SELinux changes: The new policy_delete and state_delete functions are added. Signed-off-by: Catherine Zhang <cxzhang@watson.ibm.com> Signed-off-by: Trent Jaeger <tjaeger@cse.psu.edu> Acked-by: James Morris <jmorris@namei.org> Signed-off-by: David S. Miller <davem@davemloft.net>
2006-06-09 14:39:49 +08:00
{
return 0;
}
static inline int security_xfrm_policy_lookup(struct xfrm_sec_ctx *ctx, u32 fl_secid, u8 dir)
[LSM-IPSec]: Security association restriction. This patch series implements per packet access control via the extension of the Linux Security Modules (LSM) interface by hooks in the XFRM and pfkey subsystems that leverage IPSec security associations to label packets. Extensions to the SELinux LSM are included that leverage the patch for this purpose. This patch implements the changes necessary to the XFRM subsystem, pfkey interface, ipv4/ipv6, and xfrm_user interface to restrict a socket to use only authorized security associations (or no security association) to send/receive network packets. Patch purpose: The patch is designed to enable access control per packets based on the strongly authenticated IPSec security association. Such access controls augment the existing ones based on network interface and IP address. The former are very coarse-grained, and the latter can be spoofed. By using IPSec, the system can control access to remote hosts based on cryptographic keys generated using the IPSec mechanism. This enables access control on a per-machine basis or per-application if the remote machine is running the same mechanism and trusted to enforce the access control policy. Patch design approach: The overall approach is that policy (xfrm_policy) entries set by user-level programs (e.g., setkey for ipsec-tools) are extended with a security context that is used at policy selection time in the XFRM subsystem to restrict the sockets that can send/receive packets via security associations (xfrm_states) that are built from those policies. A presentation available at www.selinux-symposium.org/2005/presentations/session2/2-3-jaeger.pdf from the SELinux symposium describes the overall approach. Patch implementation details: On output, the policy retrieved (via xfrm_policy_lookup or xfrm_sk_policy_lookup) must be authorized for the security context of the socket and the same security context is required for resultant security association (retrieved or negotiated via racoon in ipsec-tools). This is enforced in xfrm_state_find. On input, the policy retrieved must also be authorized for the socket (at __xfrm_policy_check), and the security context of the policy must also match the security association being used. The patch has virtually no impact on packets that do not use IPSec. The existing Netfilter (outgoing) and LSM rcv_skb hooks are used as before. Also, if IPSec is used without security contexts, the impact is minimal. The LSM must allow such policies to be selected for the combination of socket and remote machine, but subsequent IPSec processing proceeds as in the original case. Testing: The pfkey interface is tested using the ipsec-tools. ipsec-tools have been modified (a separate ipsec-tools patch is available for version 0.5) that supports assignment of xfrm_policy entries and security associations with security contexts via setkey and the negotiation using the security contexts via racoon. The xfrm_user interface is tested via ad hoc programs that set security contexts. These programs are also available from me, and contain programs for setting, getting, and deleting policy for testing this interface. Testing of sa functions was done by tracing kernel behavior. Signed-off-by: Trent Jaeger <tjaeger@cse.psu.edu> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> Signed-off-by: David S. Miller <davem@davemloft.net>
2005-12-14 15:12:27 +08:00
{
return 0;
}
static inline int security_xfrm_state_pol_flow_match(struct xfrm_state *x,
struct xfrm_policy *xp, struct flowi *fl)
{
return 1;
}
static inline int security_xfrm_decode_session(struct sk_buff *skb, u32 *secid)
{
return 0;
}
static inline void security_skb_classify_flow(struct sk_buff *skb, struct flowi *fl)
{
}
[LSM-IPSec]: Security association restriction. This patch series implements per packet access control via the extension of the Linux Security Modules (LSM) interface by hooks in the XFRM and pfkey subsystems that leverage IPSec security associations to label packets. Extensions to the SELinux LSM are included that leverage the patch for this purpose. This patch implements the changes necessary to the XFRM subsystem, pfkey interface, ipv4/ipv6, and xfrm_user interface to restrict a socket to use only authorized security associations (or no security association) to send/receive network packets. Patch purpose: The patch is designed to enable access control per packets based on the strongly authenticated IPSec security association. Such access controls augment the existing ones based on network interface and IP address. The former are very coarse-grained, and the latter can be spoofed. By using IPSec, the system can control access to remote hosts based on cryptographic keys generated using the IPSec mechanism. This enables access control on a per-machine basis or per-application if the remote machine is running the same mechanism and trusted to enforce the access control policy. Patch design approach: The overall approach is that policy (xfrm_policy) entries set by user-level programs (e.g., setkey for ipsec-tools) are extended with a security context that is used at policy selection time in the XFRM subsystem to restrict the sockets that can send/receive packets via security associations (xfrm_states) that are built from those policies. A presentation available at www.selinux-symposium.org/2005/presentations/session2/2-3-jaeger.pdf from the SELinux symposium describes the overall approach. Patch implementation details: On output, the policy retrieved (via xfrm_policy_lookup or xfrm_sk_policy_lookup) must be authorized for the security context of the socket and the same security context is required for resultant security association (retrieved or negotiated via racoon in ipsec-tools). This is enforced in xfrm_state_find. On input, the policy retrieved must also be authorized for the socket (at __xfrm_policy_check), and the security context of the policy must also match the security association being used. The patch has virtually no impact on packets that do not use IPSec. The existing Netfilter (outgoing) and LSM rcv_skb hooks are used as before. Also, if IPSec is used without security contexts, the impact is minimal. The LSM must allow such policies to be selected for the combination of socket and remote machine, but subsequent IPSec processing proceeds as in the original case. Testing: The pfkey interface is tested using the ipsec-tools. ipsec-tools have been modified (a separate ipsec-tools patch is available for version 0.5) that supports assignment of xfrm_policy entries and security associations with security contexts via setkey and the negotiation using the security contexts via racoon. The xfrm_user interface is tested via ad hoc programs that set security contexts. These programs are also available from me, and contain programs for setting, getting, and deleting policy for testing this interface. Testing of sa functions was done by tracing kernel behavior. Signed-off-by: Trent Jaeger <tjaeger@cse.psu.edu> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> Signed-off-by: David S. Miller <davem@davemloft.net>
2005-12-14 15:12:27 +08:00
#endif /* CONFIG_SECURITY_NETWORK_XFRM */
#ifdef CONFIG_SECURITY_PATH
int security_path_unlink(struct path *dir, struct dentry *dentry);
int security_path_mkdir(struct path *dir, struct dentry *dentry, int mode);
int security_path_rmdir(struct path *dir, struct dentry *dentry);
int security_path_mknod(struct path *dir, struct dentry *dentry, int mode,
unsigned int dev);
int security_path_truncate(struct path *path);
int security_path_symlink(struct path *dir, struct dentry *dentry,
const char *old_name);
int security_path_link(struct dentry *old_dentry, struct path *new_dir,
struct dentry *new_dentry);
int security_path_rename(struct path *old_dir, struct dentry *old_dentry,
struct path *new_dir, struct dentry *new_dentry);
int security_path_chmod(struct dentry *dentry, struct vfsmount *mnt,
mode_t mode);
int security_path_chown(struct path *path, uid_t uid, gid_t gid);
int security_path_chroot(struct path *path);
#else /* CONFIG_SECURITY_PATH */
static inline int security_path_unlink(struct path *dir, struct dentry *dentry)
{
return 0;
}
static inline int security_path_mkdir(struct path *dir, struct dentry *dentry,
int mode)
{
return 0;
}
static inline int security_path_rmdir(struct path *dir, struct dentry *dentry)
{
return 0;
}
static inline int security_path_mknod(struct path *dir, struct dentry *dentry,
int mode, unsigned int dev)
{
return 0;
}
static inline int security_path_truncate(struct path *path)
{
return 0;
}
static inline int security_path_symlink(struct path *dir, struct dentry *dentry,
const char *old_name)
{
return 0;
}
static inline int security_path_link(struct dentry *old_dentry,
struct path *new_dir,
struct dentry *new_dentry)
{
return 0;
}
static inline int security_path_rename(struct path *old_dir,
struct dentry *old_dentry,
struct path *new_dir,
struct dentry *new_dentry)
{
return 0;
}
static inline int security_path_chmod(struct dentry *dentry,
struct vfsmount *mnt,
mode_t mode)
{
return 0;
}
static inline int security_path_chown(struct path *path, uid_t uid, gid_t gid)
{
return 0;
}
static inline int security_path_chroot(struct path *path)
{
return 0;
}
#endif /* CONFIG_SECURITY_PATH */
#ifdef CONFIG_KEYS
#ifdef CONFIG_SECURITY
CRED: Inaugurate COW credentials Inaugurate copy-on-write credentials management. This uses RCU to manage the credentials pointer in the task_struct with respect to accesses by other tasks. A process may only modify its own credentials, and so does not need locking to access or modify its own credentials. A mutex (cred_replace_mutex) is added to the task_struct to control the effect of PTRACE_ATTACHED on credential calculations, particularly with respect to execve(). With this patch, the contents of an active credentials struct may not be changed directly; rather a new set of credentials must be prepared, modified and committed using something like the following sequence of events: struct cred *new = prepare_creds(); int ret = blah(new); if (ret < 0) { abort_creds(new); return ret; } return commit_creds(new); There are some exceptions to this rule: the keyrings pointed to by the active credentials may be instantiated - keyrings violate the COW rule as managing COW keyrings is tricky, given that it is possible for a task to directly alter the keys in a keyring in use by another task. To help enforce this, various pointers to sets of credentials, such as those in the task_struct, are declared const. The purpose of this is compile-time discouragement of altering credentials through those pointers. Once a set of credentials has been made public through one of these pointers, it may not be modified, except under special circumstances: (1) Its reference count may incremented and decremented. (2) The keyrings to which it points may be modified, but not replaced. The only safe way to modify anything else is to create a replacement and commit using the functions described in Documentation/credentials.txt (which will be added by a later patch). This patch and the preceding patches have been tested with the LTP SELinux testsuite. This patch makes several logical sets of alteration: (1) execve(). This now prepares and commits credentials in various places in the security code rather than altering the current creds directly. (2) Temporary credential overrides. do_coredump() and sys_faccessat() now prepare their own credentials and temporarily override the ones currently on the acting thread, whilst preventing interference from other threads by holding cred_replace_mutex on the thread being dumped. This will be replaced in a future patch by something that hands down the credentials directly to the functions being called, rather than altering the task's objective credentials. (3) LSM interface. A number of functions have been changed, added or removed: (*) security_capset_check(), ->capset_check() (*) security_capset_set(), ->capset_set() Removed in favour of security_capset(). (*) security_capset(), ->capset() New. This is passed a pointer to the new creds, a pointer to the old creds and the proposed capability sets. It should fill in the new creds or return an error. All pointers, barring the pointer to the new creds, are now const. (*) security_bprm_apply_creds(), ->bprm_apply_creds() Changed; now returns a value, which will cause the process to be killed if it's an error. (*) security_task_alloc(), ->task_alloc_security() Removed in favour of security_prepare_creds(). (*) security_cred_free(), ->cred_free() New. Free security data attached to cred->security. (*) security_prepare_creds(), ->cred_prepare() New. Duplicate any security data attached to cred->security. (*) security_commit_creds(), ->cred_commit() New. Apply any security effects for the upcoming installation of new security by commit_creds(). (*) security_task_post_setuid(), ->task_post_setuid() Removed in favour of security_task_fix_setuid(). (*) security_task_fix_setuid(), ->task_fix_setuid() Fix up the proposed new credentials for setuid(). This is used by cap_set_fix_setuid() to implicitly adjust capabilities in line with setuid() changes. Changes are made to the new credentials, rather than the task itself as in security_task_post_setuid(). (*) security_task_reparent_to_init(), ->task_reparent_to_init() Removed. Instead the task being reparented to init is referred directly to init's credentials. NOTE! This results in the loss of some state: SELinux's osid no longer records the sid of the thread that forked it. (*) security_key_alloc(), ->key_alloc() (*) security_key_permission(), ->key_permission() Changed. These now take cred pointers rather than task pointers to refer to the security context. (4) sys_capset(). This has been simplified and uses less locking. The LSM functions it calls have been merged. (5) reparent_to_kthreadd(). This gives the current thread the same credentials as init by simply using commit_thread() to point that way. (6) __sigqueue_alloc() and switch_uid() __sigqueue_alloc() can't stop the target task from changing its creds beneath it, so this function gets a reference to the currently applicable user_struct which it then passes into the sigqueue struct it returns if successful. switch_uid() is now called from commit_creds(), and possibly should be folded into that. commit_creds() should take care of protecting __sigqueue_alloc(). (7) [sg]et[ug]id() and co and [sg]et_current_groups. The set functions now all use prepare_creds(), commit_creds() and abort_creds() to build and check a new set of credentials before applying it. security_task_set[ug]id() is called inside the prepared section. This guarantees that nothing else will affect the creds until we've finished. The calling of set_dumpable() has been moved into commit_creds(). Much of the functionality of set_user() has been moved into commit_creds(). The get functions all simply access the data directly. (8) security_task_prctl() and cap_task_prctl(). security_task_prctl() has been modified to return -ENOSYS if it doesn't want to handle a function, or otherwise return the return value directly rather than through an argument. Additionally, cap_task_prctl() now prepares a new set of credentials, even if it doesn't end up using it. (9) Keyrings. A number of changes have been made to the keyrings code: (a) switch_uid_keyring(), copy_keys(), exit_keys() and suid_keys() have all been dropped and built in to the credentials functions directly. They may want separating out again later. (b) key_alloc() and search_process_keyrings() now take a cred pointer rather than a task pointer to specify the security context. (c) copy_creds() gives a new thread within the same thread group a new thread keyring if its parent had one, otherwise it discards the thread keyring. (d) The authorisation key now points directly to the credentials to extend the search into rather pointing to the task that carries them. (e) Installing thread, process or session keyrings causes a new set of credentials to be created, even though it's not strictly necessary for process or session keyrings (they're shared). (10) Usermode helper. The usermode helper code now carries a cred struct pointer in its subprocess_info struct instead of a new session keyring pointer. This set of credentials is derived from init_cred and installed on the new process after it has been cloned. call_usermodehelper_setup() allocates the new credentials and call_usermodehelper_freeinfo() discards them if they haven't been used. A special cred function (prepare_usermodeinfo_creds()) is provided specifically for call_usermodehelper_setup() to call. call_usermodehelper_setkeys() adjusts the credentials to sport the supplied keyring as the new session keyring. (11) SELinux. SELinux has a number of changes, in addition to those to support the LSM interface changes mentioned above: (a) selinux_setprocattr() no longer does its check for whether the current ptracer can access processes with the new SID inside the lock that covers getting the ptracer's SID. Whilst this lock ensures that the check is done with the ptracer pinned, the result is only valid until the lock is released, so there's no point doing it inside the lock. (12) is_single_threaded(). This function has been extracted from selinux_setprocattr() and put into a file of its own in the lib/ directory as join_session_keyring() now wants to use it too. The code in SELinux just checked to see whether a task shared mm_structs with other tasks (CLONE_VM), but that isn't good enough. We really want to know if they're part of the same thread group (CLONE_THREAD). (13) nfsd. The NFS server daemon now has to use the COW credentials to set the credentials it is going to use. It really needs to pass the credentials down to the functions it calls, but it can't do that until other patches in this series have been applied. Signed-off-by: David Howells <dhowells@redhat.com> Acked-by: James Morris <jmorris@namei.org> Signed-off-by: James Morris <jmorris@namei.org>
2008-11-14 07:39:23 +08:00
int security_key_alloc(struct key *key, const struct cred *cred, unsigned long flags);
void security_key_free(struct key *key);
int security_key_permission(key_ref_t key_ref,
CRED: Inaugurate COW credentials Inaugurate copy-on-write credentials management. This uses RCU to manage the credentials pointer in the task_struct with respect to accesses by other tasks. A process may only modify its own credentials, and so does not need locking to access or modify its own credentials. A mutex (cred_replace_mutex) is added to the task_struct to control the effect of PTRACE_ATTACHED on credential calculations, particularly with respect to execve(). With this patch, the contents of an active credentials struct may not be changed directly; rather a new set of credentials must be prepared, modified and committed using something like the following sequence of events: struct cred *new = prepare_creds(); int ret = blah(new); if (ret < 0) { abort_creds(new); return ret; } return commit_creds(new); There are some exceptions to this rule: the keyrings pointed to by the active credentials may be instantiated - keyrings violate the COW rule as managing COW keyrings is tricky, given that it is possible for a task to directly alter the keys in a keyring in use by another task. To help enforce this, various pointers to sets of credentials, such as those in the task_struct, are declared const. The purpose of this is compile-time discouragement of altering credentials through those pointers. Once a set of credentials has been made public through one of these pointers, it may not be modified, except under special circumstances: (1) Its reference count may incremented and decremented. (2) The keyrings to which it points may be modified, but not replaced. The only safe way to modify anything else is to create a replacement and commit using the functions described in Documentation/credentials.txt (which will be added by a later patch). This patch and the preceding patches have been tested with the LTP SELinux testsuite. This patch makes several logical sets of alteration: (1) execve(). This now prepares and commits credentials in various places in the security code rather than altering the current creds directly. (2) Temporary credential overrides. do_coredump() and sys_faccessat() now prepare their own credentials and temporarily override the ones currently on the acting thread, whilst preventing interference from other threads by holding cred_replace_mutex on the thread being dumped. This will be replaced in a future patch by something that hands down the credentials directly to the functions being called, rather than altering the task's objective credentials. (3) LSM interface. A number of functions have been changed, added or removed: (*) security_capset_check(), ->capset_check() (*) security_capset_set(), ->capset_set() Removed in favour of security_capset(). (*) security_capset(), ->capset() New. This is passed a pointer to the new creds, a pointer to the old creds and the proposed capability sets. It should fill in the new creds or return an error. All pointers, barring the pointer to the new creds, are now const. (*) security_bprm_apply_creds(), ->bprm_apply_creds() Changed; now returns a value, which will cause the process to be killed if it's an error. (*) security_task_alloc(), ->task_alloc_security() Removed in favour of security_prepare_creds(). (*) security_cred_free(), ->cred_free() New. Free security data attached to cred->security. (*) security_prepare_creds(), ->cred_prepare() New. Duplicate any security data attached to cred->security. (*) security_commit_creds(), ->cred_commit() New. Apply any security effects for the upcoming installation of new security by commit_creds(). (*) security_task_post_setuid(), ->task_post_setuid() Removed in favour of security_task_fix_setuid(). (*) security_task_fix_setuid(), ->task_fix_setuid() Fix up the proposed new credentials for setuid(). This is used by cap_set_fix_setuid() to implicitly adjust capabilities in line with setuid() changes. Changes are made to the new credentials, rather than the task itself as in security_task_post_setuid(). (*) security_task_reparent_to_init(), ->task_reparent_to_init() Removed. Instead the task being reparented to init is referred directly to init's credentials. NOTE! This results in the loss of some state: SELinux's osid no longer records the sid of the thread that forked it. (*) security_key_alloc(), ->key_alloc() (*) security_key_permission(), ->key_permission() Changed. These now take cred pointers rather than task pointers to refer to the security context. (4) sys_capset(). This has been simplified and uses less locking. The LSM functions it calls have been merged. (5) reparent_to_kthreadd(). This gives the current thread the same credentials as init by simply using commit_thread() to point that way. (6) __sigqueue_alloc() and switch_uid() __sigqueue_alloc() can't stop the target task from changing its creds beneath it, so this function gets a reference to the currently applicable user_struct which it then passes into the sigqueue struct it returns if successful. switch_uid() is now called from commit_creds(), and possibly should be folded into that. commit_creds() should take care of protecting __sigqueue_alloc(). (7) [sg]et[ug]id() and co and [sg]et_current_groups. The set functions now all use prepare_creds(), commit_creds() and abort_creds() to build and check a new set of credentials before applying it. security_task_set[ug]id() is called inside the prepared section. This guarantees that nothing else will affect the creds until we've finished. The calling of set_dumpable() has been moved into commit_creds(). Much of the functionality of set_user() has been moved into commit_creds(). The get functions all simply access the data directly. (8) security_task_prctl() and cap_task_prctl(). security_task_prctl() has been modified to return -ENOSYS if it doesn't want to handle a function, or otherwise return the return value directly rather than through an argument. Additionally, cap_task_prctl() now prepares a new set of credentials, even if it doesn't end up using it. (9) Keyrings. A number of changes have been made to the keyrings code: (a) switch_uid_keyring(), copy_keys(), exit_keys() and suid_keys() have all been dropped and built in to the credentials functions directly. They may want separating out again later. (b) key_alloc() and search_process_keyrings() now take a cred pointer rather than a task pointer to specify the security context. (c) copy_creds() gives a new thread within the same thread group a new thread keyring if its parent had one, otherwise it discards the thread keyring. (d) The authorisation key now points directly to the credentials to extend the search into rather pointing to the task that carries them. (e) Installing thread, process or session keyrings causes a new set of credentials to be created, even though it's not strictly necessary for process or session keyrings (they're shared). (10) Usermode helper. The usermode helper code now carries a cred struct pointer in its subprocess_info struct instead of a new session keyring pointer. This set of credentials is derived from init_cred and installed on the new process after it has been cloned. call_usermodehelper_setup() allocates the new credentials and call_usermodehelper_freeinfo() discards them if they haven't been used. A special cred function (prepare_usermodeinfo_creds()) is provided specifically for call_usermodehelper_setup() to call. call_usermodehelper_setkeys() adjusts the credentials to sport the supplied keyring as the new session keyring. (11) SELinux. SELinux has a number of changes, in addition to those to support the LSM interface changes mentioned above: (a) selinux_setprocattr() no longer does its check for whether the current ptracer can access processes with the new SID inside the lock that covers getting the ptracer's SID. Whilst this lock ensures that the check is done with the ptracer pinned, the result is only valid until the lock is released, so there's no point doing it inside the lock. (12) is_single_threaded(). This function has been extracted from selinux_setprocattr() and put into a file of its own in the lib/ directory as join_session_keyring() now wants to use it too. The code in SELinux just checked to see whether a task shared mm_structs with other tasks (CLONE_VM), but that isn't good enough. We really want to know if they're part of the same thread group (CLONE_THREAD). (13) nfsd. The NFS server daemon now has to use the COW credentials to set the credentials it is going to use. It really needs to pass the credentials down to the functions it calls, but it can't do that until other patches in this series have been applied. Signed-off-by: David Howells <dhowells@redhat.com> Acked-by: James Morris <jmorris@namei.org> Signed-off-by: James Morris <jmorris@namei.org>
2008-11-14 07:39:23 +08:00
const struct cred *cred, key_perm_t perm);
int security_key_getsecurity(struct key *key, char **_buffer);
#else
static inline int security_key_alloc(struct key *key,
CRED: Inaugurate COW credentials Inaugurate copy-on-write credentials management. This uses RCU to manage the credentials pointer in the task_struct with respect to accesses by other tasks. A process may only modify its own credentials, and so does not need locking to access or modify its own credentials. A mutex (cred_replace_mutex) is added to the task_struct to control the effect of PTRACE_ATTACHED on credential calculations, particularly with respect to execve(). With this patch, the contents of an active credentials struct may not be changed directly; rather a new set of credentials must be prepared, modified and committed using something like the following sequence of events: struct cred *new = prepare_creds(); int ret = blah(new); if (ret < 0) { abort_creds(new); return ret; } return commit_creds(new); There are some exceptions to this rule: the keyrings pointed to by the active credentials may be instantiated - keyrings violate the COW rule as managing COW keyrings is tricky, given that it is possible for a task to directly alter the keys in a keyring in use by another task. To help enforce this, various pointers to sets of credentials, such as those in the task_struct, are declared const. The purpose of this is compile-time discouragement of altering credentials through those pointers. Once a set of credentials has been made public through one of these pointers, it may not be modified, except under special circumstances: (1) Its reference count may incremented and decremented. (2) The keyrings to which it points may be modified, but not replaced. The only safe way to modify anything else is to create a replacement and commit using the functions described in Documentation/credentials.txt (which will be added by a later patch). This patch and the preceding patches have been tested with the LTP SELinux testsuite. This patch makes several logical sets of alteration: (1) execve(). This now prepares and commits credentials in various places in the security code rather than altering the current creds directly. (2) Temporary credential overrides. do_coredump() and sys_faccessat() now prepare their own credentials and temporarily override the ones currently on the acting thread, whilst preventing interference from other threads by holding cred_replace_mutex on the thread being dumped. This will be replaced in a future patch by something that hands down the credentials directly to the functions being called, rather than altering the task's objective credentials. (3) LSM interface. A number of functions have been changed, added or removed: (*) security_capset_check(), ->capset_check() (*) security_capset_set(), ->capset_set() Removed in favour of security_capset(). (*) security_capset(), ->capset() New. This is passed a pointer to the new creds, a pointer to the old creds and the proposed capability sets. It should fill in the new creds or return an error. All pointers, barring the pointer to the new creds, are now const. (*) security_bprm_apply_creds(), ->bprm_apply_creds() Changed; now returns a value, which will cause the process to be killed if it's an error. (*) security_task_alloc(), ->task_alloc_security() Removed in favour of security_prepare_creds(). (*) security_cred_free(), ->cred_free() New. Free security data attached to cred->security. (*) security_prepare_creds(), ->cred_prepare() New. Duplicate any security data attached to cred->security. (*) security_commit_creds(), ->cred_commit() New. Apply any security effects for the upcoming installation of new security by commit_creds(). (*) security_task_post_setuid(), ->task_post_setuid() Removed in favour of security_task_fix_setuid(). (*) security_task_fix_setuid(), ->task_fix_setuid() Fix up the proposed new credentials for setuid(). This is used by cap_set_fix_setuid() to implicitly adjust capabilities in line with setuid() changes. Changes are made to the new credentials, rather than the task itself as in security_task_post_setuid(). (*) security_task_reparent_to_init(), ->task_reparent_to_init() Removed. Instead the task being reparented to init is referred directly to init's credentials. NOTE! This results in the loss of some state: SELinux's osid no longer records the sid of the thread that forked it. (*) security_key_alloc(), ->key_alloc() (*) security_key_permission(), ->key_permission() Changed. These now take cred pointers rather than task pointers to refer to the security context. (4) sys_capset(). This has been simplified and uses less locking. The LSM functions it calls have been merged. (5) reparent_to_kthreadd(). This gives the current thread the same credentials as init by simply using commit_thread() to point that way. (6) __sigqueue_alloc() and switch_uid() __sigqueue_alloc() can't stop the target task from changing its creds beneath it, so this function gets a reference to the currently applicable user_struct which it then passes into the sigqueue struct it returns if successful. switch_uid() is now called from commit_creds(), and possibly should be folded into that. commit_creds() should take care of protecting __sigqueue_alloc(). (7) [sg]et[ug]id() and co and [sg]et_current_groups. The set functions now all use prepare_creds(), commit_creds() and abort_creds() to build and check a new set of credentials before applying it. security_task_set[ug]id() is called inside the prepared section. This guarantees that nothing else will affect the creds until we've finished. The calling of set_dumpable() has been moved into commit_creds(). Much of the functionality of set_user() has been moved into commit_creds(). The get functions all simply access the data directly. (8) security_task_prctl() and cap_task_prctl(). security_task_prctl() has been modified to return -ENOSYS if it doesn't want to handle a function, or otherwise return the return value directly rather than through an argument. Additionally, cap_task_prctl() now prepares a new set of credentials, even if it doesn't end up using it. (9) Keyrings. A number of changes have been made to the keyrings code: (a) switch_uid_keyring(), copy_keys(), exit_keys() and suid_keys() have all been dropped and built in to the credentials functions directly. They may want separating out again later. (b) key_alloc() and search_process_keyrings() now take a cred pointer rather than a task pointer to specify the security context. (c) copy_creds() gives a new thread within the same thread group a new thread keyring if its parent had one, otherwise it discards the thread keyring. (d) The authorisation key now points directly to the credentials to extend the search into rather pointing to the task that carries them. (e) Installing thread, process or session keyrings causes a new set of credentials to be created, even though it's not strictly necessary for process or session keyrings (they're shared). (10) Usermode helper. The usermode helper code now carries a cred struct pointer in its subprocess_info struct instead of a new session keyring pointer. This set of credentials is derived from init_cred and installed on the new process after it has been cloned. call_usermodehelper_setup() allocates the new credentials and call_usermodehelper_freeinfo() discards them if they haven't been used. A special cred function (prepare_usermodeinfo_creds()) is provided specifically for call_usermodehelper_setup() to call. call_usermodehelper_setkeys() adjusts the credentials to sport the supplied keyring as the new session keyring. (11) SELinux. SELinux has a number of changes, in addition to those to support the LSM interface changes mentioned above: (a) selinux_setprocattr() no longer does its check for whether the current ptracer can access processes with the new SID inside the lock that covers getting the ptracer's SID. Whilst this lock ensures that the check is done with the ptracer pinned, the result is only valid until the lock is released, so there's no point doing it inside the lock. (12) is_single_threaded(). This function has been extracted from selinux_setprocattr() and put into a file of its own in the lib/ directory as join_session_keyring() now wants to use it too. The code in SELinux just checked to see whether a task shared mm_structs with other tasks (CLONE_VM), but that isn't good enough. We really want to know if they're part of the same thread group (CLONE_THREAD). (13) nfsd. The NFS server daemon now has to use the COW credentials to set the credentials it is going to use. It really needs to pass the credentials down to the functions it calls, but it can't do that until other patches in this series have been applied. Signed-off-by: David Howells <dhowells@redhat.com> Acked-by: James Morris <jmorris@namei.org> Signed-off-by: James Morris <jmorris@namei.org>
2008-11-14 07:39:23 +08:00
const struct cred *cred,
unsigned long flags)
{
return 0;
}
static inline void security_key_free(struct key *key)
{
}
static inline int security_key_permission(key_ref_t key_ref,
CRED: Inaugurate COW credentials Inaugurate copy-on-write credentials management. This uses RCU to manage the credentials pointer in the task_struct with respect to accesses by other tasks. A process may only modify its own credentials, and so does not need locking to access or modify its own credentials. A mutex (cred_replace_mutex) is added to the task_struct to control the effect of PTRACE_ATTACHED on credential calculations, particularly with respect to execve(). With this patch, the contents of an active credentials struct may not be changed directly; rather a new set of credentials must be prepared, modified and committed using something like the following sequence of events: struct cred *new = prepare_creds(); int ret = blah(new); if (ret < 0) { abort_creds(new); return ret; } return commit_creds(new); There are some exceptions to this rule: the keyrings pointed to by the active credentials may be instantiated - keyrings violate the COW rule as managing COW keyrings is tricky, given that it is possible for a task to directly alter the keys in a keyring in use by another task. To help enforce this, various pointers to sets of credentials, such as those in the task_struct, are declared const. The purpose of this is compile-time discouragement of altering credentials through those pointers. Once a set of credentials has been made public through one of these pointers, it may not be modified, except under special circumstances: (1) Its reference count may incremented and decremented. (2) The keyrings to which it points may be modified, but not replaced. The only safe way to modify anything else is to create a replacement and commit using the functions described in Documentation/credentials.txt (which will be added by a later patch). This patch and the preceding patches have been tested with the LTP SELinux testsuite. This patch makes several logical sets of alteration: (1) execve(). This now prepares and commits credentials in various places in the security code rather than altering the current creds directly. (2) Temporary credential overrides. do_coredump() and sys_faccessat() now prepare their own credentials and temporarily override the ones currently on the acting thread, whilst preventing interference from other threads by holding cred_replace_mutex on the thread being dumped. This will be replaced in a future patch by something that hands down the credentials directly to the functions being called, rather than altering the task's objective credentials. (3) LSM interface. A number of functions have been changed, added or removed: (*) security_capset_check(), ->capset_check() (*) security_capset_set(), ->capset_set() Removed in favour of security_capset(). (*) security_capset(), ->capset() New. This is passed a pointer to the new creds, a pointer to the old creds and the proposed capability sets. It should fill in the new creds or return an error. All pointers, barring the pointer to the new creds, are now const. (*) security_bprm_apply_creds(), ->bprm_apply_creds() Changed; now returns a value, which will cause the process to be killed if it's an error. (*) security_task_alloc(), ->task_alloc_security() Removed in favour of security_prepare_creds(). (*) security_cred_free(), ->cred_free() New. Free security data attached to cred->security. (*) security_prepare_creds(), ->cred_prepare() New. Duplicate any security data attached to cred->security. (*) security_commit_creds(), ->cred_commit() New. Apply any security effects for the upcoming installation of new security by commit_creds(). (*) security_task_post_setuid(), ->task_post_setuid() Removed in favour of security_task_fix_setuid(). (*) security_task_fix_setuid(), ->task_fix_setuid() Fix up the proposed new credentials for setuid(). This is used by cap_set_fix_setuid() to implicitly adjust capabilities in line with setuid() changes. Changes are made to the new credentials, rather than the task itself as in security_task_post_setuid(). (*) security_task_reparent_to_init(), ->task_reparent_to_init() Removed. Instead the task being reparented to init is referred directly to init's credentials. NOTE! This results in the loss of some state: SELinux's osid no longer records the sid of the thread that forked it. (*) security_key_alloc(), ->key_alloc() (*) security_key_permission(), ->key_permission() Changed. These now take cred pointers rather than task pointers to refer to the security context. (4) sys_capset(). This has been simplified and uses less locking. The LSM functions it calls have been merged. (5) reparent_to_kthreadd(). This gives the current thread the same credentials as init by simply using commit_thread() to point that way. (6) __sigqueue_alloc() and switch_uid() __sigqueue_alloc() can't stop the target task from changing its creds beneath it, so this function gets a reference to the currently applicable user_struct which it then passes into the sigqueue struct it returns if successful. switch_uid() is now called from commit_creds(), and possibly should be folded into that. commit_creds() should take care of protecting __sigqueue_alloc(). (7) [sg]et[ug]id() and co and [sg]et_current_groups. The set functions now all use prepare_creds(), commit_creds() and abort_creds() to build and check a new set of credentials before applying it. security_task_set[ug]id() is called inside the prepared section. This guarantees that nothing else will affect the creds until we've finished. The calling of set_dumpable() has been moved into commit_creds(). Much of the functionality of set_user() has been moved into commit_creds(). The get functions all simply access the data directly. (8) security_task_prctl() and cap_task_prctl(). security_task_prctl() has been modified to return -ENOSYS if it doesn't want to handle a function, or otherwise return the return value directly rather than through an argument. Additionally, cap_task_prctl() now prepares a new set of credentials, even if it doesn't end up using it. (9) Keyrings. A number of changes have been made to the keyrings code: (a) switch_uid_keyring(), copy_keys(), exit_keys() and suid_keys() have all been dropped and built in to the credentials functions directly. They may want separating out again later. (b) key_alloc() and search_process_keyrings() now take a cred pointer rather than a task pointer to specify the security context. (c) copy_creds() gives a new thread within the same thread group a new thread keyring if its parent had one, otherwise it discards the thread keyring. (d) The authorisation key now points directly to the credentials to extend the search into rather pointing to the task that carries them. (e) Installing thread, process or session keyrings causes a new set of credentials to be created, even though it's not strictly necessary for process or session keyrings (they're shared). (10) Usermode helper. The usermode helper code now carries a cred struct pointer in its subprocess_info struct instead of a new session keyring pointer. This set of credentials is derived from init_cred and installed on the new process after it has been cloned. call_usermodehelper_setup() allocates the new credentials and call_usermodehelper_freeinfo() discards them if they haven't been used. A special cred function (prepare_usermodeinfo_creds()) is provided specifically for call_usermodehelper_setup() to call. call_usermodehelper_setkeys() adjusts the credentials to sport the supplied keyring as the new session keyring. (11) SELinux. SELinux has a number of changes, in addition to those to support the LSM interface changes mentioned above: (a) selinux_setprocattr() no longer does its check for whether the current ptracer can access processes with the new SID inside the lock that covers getting the ptracer's SID. Whilst this lock ensures that the check is done with the ptracer pinned, the result is only valid until the lock is released, so there's no point doing it inside the lock. (12) is_single_threaded(). This function has been extracted from selinux_setprocattr() and put into a file of its own in the lib/ directory as join_session_keyring() now wants to use it too. The code in SELinux just checked to see whether a task shared mm_structs with other tasks (CLONE_VM), but that isn't good enough. We really want to know if they're part of the same thread group (CLONE_THREAD). (13) nfsd. The NFS server daemon now has to use the COW credentials to set the credentials it is going to use. It really needs to pass the credentials down to the functions it calls, but it can't do that until other patches in this series have been applied. Signed-off-by: David Howells <dhowells@redhat.com> Acked-by: James Morris <jmorris@namei.org> Signed-off-by: James Morris <jmorris@namei.org>
2008-11-14 07:39:23 +08:00
const struct cred *cred,
key_perm_t perm)
{
return 0;
}
static inline int security_key_getsecurity(struct key *key, char **_buffer)
{
*_buffer = NULL;
return 0;
}
KEYS: Add a keyctl to install a process's session keyring on its parent [try #6] Add a keyctl to install a process's session keyring onto its parent. This replaces the parent's session keyring. Because the COW credential code does not permit one process to change another process's credentials directly, the change is deferred until userspace next starts executing again. Normally this will be after a wait*() syscall. To support this, three new security hooks have been provided: cred_alloc_blank() to allocate unset security creds, cred_transfer() to fill in the blank security creds and key_session_to_parent() - which asks the LSM if the process may replace its parent's session keyring. The replacement may only happen if the process has the same ownership details as its parent, and the process has LINK permission on the session keyring, and the session keyring is owned by the process, and the LSM permits it. Note that this requires alteration to each architecture's notify_resume path. This has been done for all arches barring blackfin, m68k* and xtensa, all of which need assembly alteration to support TIF_NOTIFY_RESUME. This allows the replacement to be performed at the point the parent process resumes userspace execution. This allows the userspace AFS pioctl emulation to fully emulate newpag() and the VIOCSETTOK and VIOCSETTOK2 pioctls, all of which require the ability to alter the parent process's PAG membership. However, since kAFS doesn't use PAGs per se, but rather dumps the keys into the session keyring, the session keyring of the parent must be replaced if, for example, VIOCSETTOK is passed the newpag flag. This can be tested with the following program: #include <stdio.h> #include <stdlib.h> #include <keyutils.h> #define KEYCTL_SESSION_TO_PARENT 18 #define OSERROR(X, S) do { if ((long)(X) == -1) { perror(S); exit(1); } } while(0) int main(int argc, char **argv) { key_serial_t keyring, key; long ret; keyring = keyctl_join_session_keyring(argv[1]); OSERROR(keyring, "keyctl_join_session_keyring"); key = add_key("user", "a", "b", 1, keyring); OSERROR(key, "add_key"); ret = keyctl(KEYCTL_SESSION_TO_PARENT); OSERROR(ret, "KEYCTL_SESSION_TO_PARENT"); return 0; } Compiled and linked with -lkeyutils, you should see something like: [dhowells@andromeda ~]$ keyctl show Session Keyring -3 --alswrv 4043 4043 keyring: _ses 355907932 --alswrv 4043 -1 \_ keyring: _uid.4043 [dhowells@andromeda ~]$ /tmp/newpag [dhowells@andromeda ~]$ keyctl show Session Keyring -3 --alswrv 4043 4043 keyring: _ses 1055658746 --alswrv 4043 4043 \_ user: a [dhowells@andromeda ~]$ /tmp/newpag hello [dhowells@andromeda ~]$ keyctl show Session Keyring -3 --alswrv 4043 4043 keyring: hello 340417692 --alswrv 4043 4043 \_ user: a Where the test program creates a new session keyring, sticks a user key named 'a' into it and then installs it on its parent. Signed-off-by: David Howells <dhowells@redhat.com> Signed-off-by: James Morris <jmorris@namei.org>
2009-09-02 16:14:21 +08:00
#endif
#endif /* CONFIG_KEYS */
#ifdef CONFIG_AUDIT
#ifdef CONFIG_SECURITY
int security_audit_rule_init(u32 field, u32 op, char *rulestr, void **lsmrule);
int security_audit_rule_known(struct audit_krule *krule);
int security_audit_rule_match(u32 secid, u32 field, u32 op, void *lsmrule,
struct audit_context *actx);
void security_audit_rule_free(void *lsmrule);
#else
static inline int security_audit_rule_init(u32 field, u32 op, char *rulestr,
void **lsmrule)
{
return 0;
}
static inline int security_audit_rule_known(struct audit_krule *krule)
{
return 0;
}
static inline int security_audit_rule_match(u32 secid, u32 field, u32 op,
void *lsmrule, struct audit_context *actx)
{
return 0;
}
static inline void security_audit_rule_free(void *lsmrule)
{ }
#endif /* CONFIG_SECURITY */
#endif /* CONFIG_AUDIT */
#ifdef CONFIG_SECURITYFS
extern struct dentry *securityfs_create_file(const char *name, mode_t mode,
struct dentry *parent, void *data,
const struct file_operations *fops);
extern struct dentry *securityfs_create_dir(const char *name, struct dentry *parent);
extern void securityfs_remove(struct dentry *dentry);
#else /* CONFIG_SECURITYFS */
static inline struct dentry *securityfs_create_dir(const char *name,
struct dentry *parent)
{
return ERR_PTR(-ENODEV);
}
static inline struct dentry *securityfs_create_file(const char *name,
mode_t mode,
struct dentry *parent,
void *data,
const struct file_operations *fops)
{
return ERR_PTR(-ENODEV);
}
static inline void securityfs_remove(struct dentry *dentry)
{}
#endif
#ifdef CONFIG_SECURITY
static inline char *alloc_secdata(void)
{
return (char *)get_zeroed_page(GFP_KERNEL);
}
static inline void free_secdata(void *secdata)
{
free_page((unsigned long)secdata);
}
#else
static inline char *alloc_secdata(void)
{
return (char *)1;
}
static inline void free_secdata(void *secdata)
{ }
#endif /* CONFIG_SECURITY */
#endif /* ! __LINUX_SECURITY_H */