pid namespaces: define is_global_init() and is_container_init()
is_init() is an ambiguous name for the pid==1 check. Split it into is_global_init() and is_container_init(). A cgroup init has it's tsk->pid == 1. A global init also has it's tsk->pid == 1 and it's active pid namespace is the init_pid_ns. But rather than check the active pid namespace, compare the task structure with 'init_pid_ns.child_reaper', which is initialized during boot to the /sbin/init process and never changes. Changelog: 2.6.22-rc4-mm2-pidns1: - Use 'init_pid_ns.child_reaper' to determine if a given task is the global init (/sbin/init) process. This would improve performance and remove dependence on the task_pid(). 2.6.21-mm2-pidns2: - [Sukadev Bhattiprolu] Changed is_container_init() calls in {powerpc, ppc,avr32}/traps.c for the _exception() call to is_global_init(). This way, we kill only the cgroup if the cgroup's init has a bug rather than force a kernel panic. [akpm@linux-foundation.org: fix comment] [sukadev@us.ibm.com: Use is_global_init() in arch/m32r/mm/fault.c] [bunk@stusta.de: kernel/pid.c: remove unused exports] [sukadev@us.ibm.com: Fix capability.c to work with threaded init] Signed-off-by: Serge E. Hallyn <serue@us.ibm.com> Signed-off-by: Sukadev Bhattiprolu <sukadev@us.ibm.com> Acked-by: Pavel Emelianov <xemul@openvz.org> Cc: Eric W. Biederman <ebiederm@xmission.com> Cc: Cedric Le Goater <clg@fr.ibm.com> Cc: Dave Hansen <haveblue@us.ibm.com> Cc: Herbert Poetzel <herbert@13thfloor.at> Cc: Kirill Korotaev <dev@sw.ru> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
parent
3743ca05ff
commit
b460cbc581
|
@ -188,7 +188,7 @@ do_page_fault(unsigned long address, unsigned long mmcsr,
|
|||
/* We ran out of memory, or some other thing happened to us that
|
||||
made us unable to handle the page fault gracefully. */
|
||||
out_of_memory:
|
||||
if (is_init(current)) {
|
||||
if (is_global_init(current)) {
|
||||
yield();
|
||||
down_read(&mm->mmap_sem);
|
||||
goto survive;
|
||||
|
|
|
@ -197,7 +197,7 @@ survive:
|
|||
return fault;
|
||||
|
||||
out_of_memory:
|
||||
if (!is_init(tsk))
|
||||
if (!is_global_init(tsk))
|
||||
goto out;
|
||||
|
||||
/*
|
||||
|
|
|
@ -89,7 +89,7 @@ void _exception(long signr, struct pt_regs *regs, int code,
|
|||
* generate the same exception over and over again and we get
|
||||
* nowhere. Better to kill it and let the kernel panic.
|
||||
*/
|
||||
if (is_init(current)) {
|
||||
if (is_global_init(current)) {
|
||||
__sighandler_t handler;
|
||||
|
||||
spin_lock_irq(¤t->sighand->siglock);
|
||||
|
|
|
@ -160,7 +160,7 @@ bad_area:
|
|||
if (exception_trace && printk_ratelimit())
|
||||
printk("%s%s[%d]: segfault at %08lx pc %08lx "
|
||||
"sp %08lx ecr %lu\n",
|
||||
is_init(tsk) ? KERN_EMERG : KERN_INFO,
|
||||
is_global_init(tsk) ? KERN_EMERG : KERN_INFO,
|
||||
tsk->comm, tsk->pid, address, regs->pc,
|
||||
regs->sp, ecr);
|
||||
_exception(SIGSEGV, regs, code, address);
|
||||
|
@ -209,7 +209,7 @@ no_context:
|
|||
*/
|
||||
out_of_memory:
|
||||
up_read(&mm->mmap_sem);
|
||||
if (is_init(current)) {
|
||||
if (is_global_init(current)) {
|
||||
yield();
|
||||
down_read(&mm->mmap_sem);
|
||||
goto survive;
|
||||
|
@ -231,7 +231,7 @@ do_sigbus:
|
|||
if (exception_trace)
|
||||
printk("%s%s[%d]: bus error at %08lx pc %08lx "
|
||||
"sp %08lx ecr %lu\n",
|
||||
is_init(tsk) ? KERN_EMERG : KERN_INFO,
|
||||
is_global_init(tsk) ? KERN_EMERG : KERN_INFO,
|
||||
tsk->comm, tsk->pid, address, regs->pc,
|
||||
regs->sp, ecr);
|
||||
|
||||
|
|
|
@ -274,7 +274,7 @@ ia64_do_page_fault (unsigned long address, unsigned long isr, struct pt_regs *re
|
|||
|
||||
out_of_memory:
|
||||
up_read(&mm->mmap_sem);
|
||||
if (is_init(current)) {
|
||||
if (is_global_init(current)) {
|
||||
yield();
|
||||
down_read(&mm->mmap_sem);
|
||||
goto survive;
|
||||
|
|
|
@ -271,7 +271,7 @@ no_context:
|
|||
*/
|
||||
out_of_memory:
|
||||
up_read(&mm->mmap_sem);
|
||||
if (is_init(tsk)) {
|
||||
if (is_global_init(tsk)) {
|
||||
yield();
|
||||
down_read(&mm->mmap_sem);
|
||||
goto survive;
|
||||
|
|
|
@ -180,7 +180,7 @@ good_area:
|
|||
*/
|
||||
out_of_memory:
|
||||
up_read(&mm->mmap_sem);
|
||||
if (is_init(current)) {
|
||||
if (is_global_init(current)) {
|
||||
yield();
|
||||
down_read(&mm->mmap_sem);
|
||||
goto survive;
|
||||
|
|
|
@ -173,7 +173,7 @@ no_context:
|
|||
*/
|
||||
out_of_memory:
|
||||
up_read(&mm->mmap_sem);
|
||||
if (is_init(tsk)) {
|
||||
if (is_global_init(tsk)) {
|
||||
yield();
|
||||
down_read(&mm->mmap_sem);
|
||||
goto survive;
|
||||
|
|
|
@ -201,7 +201,7 @@ void _exception(int signr, struct pt_regs *regs, int code, unsigned long addr)
|
|||
* generate the same exception over and over again and we get
|
||||
* nowhere. Better to kill it and let the kernel panic.
|
||||
*/
|
||||
if (is_init(current)) {
|
||||
if (is_global_init(current)) {
|
||||
__sighandler_t handler;
|
||||
|
||||
spin_lock_irq(¤t->sighand->siglock);
|
||||
|
|
|
@ -375,7 +375,7 @@ bad_area_nosemaphore:
|
|||
*/
|
||||
out_of_memory:
|
||||
up_read(&mm->mmap_sem);
|
||||
if (is_init(current)) {
|
||||
if (is_global_init(current)) {
|
||||
yield();
|
||||
down_read(&mm->mmap_sem);
|
||||
goto survive;
|
||||
|
|
|
@ -332,7 +332,7 @@ static int recover_mce(struct pt_regs *regs, struct rtas_error_log * err)
|
|||
err->disposition == RTAS_DISP_NOT_RECOVERED &&
|
||||
err->target == RTAS_TARGET_MEMORY &&
|
||||
err->type == RTAS_TYPE_ECC_UNCORR &&
|
||||
!(current->pid == 0 || is_init(current))) {
|
||||
!(current->pid == 0 || is_global_init(current))) {
|
||||
/* Kill off a user process with an ECC error */
|
||||
printk(KERN_ERR "MCE: uncorrectable ecc error for pid %d\n",
|
||||
current->pid);
|
||||
|
|
|
@ -121,7 +121,7 @@ void _exception(int signr, struct pt_regs *regs, int code, unsigned long addr)
|
|||
* generate the same exception over and over again and we get
|
||||
* nowhere. Better to kill it and let the kernel panic.
|
||||
*/
|
||||
if (is_init(current)) {
|
||||
if (is_global_init(current)) {
|
||||
__sighandler_t handler;
|
||||
|
||||
spin_lock_irq(¤t->sighand->siglock);
|
||||
|
|
|
@ -290,7 +290,7 @@ bad_area:
|
|||
*/
|
||||
out_of_memory:
|
||||
up_read(&mm->mmap_sem);
|
||||
if (is_init(current)) {
|
||||
if (is_global_init(current)) {
|
||||
yield();
|
||||
down_read(&mm->mmap_sem);
|
||||
goto survive;
|
||||
|
|
|
@ -64,7 +64,7 @@ out:
|
|||
|
||||
out_of_memory:
|
||||
up_read(&mm->mmap_sem);
|
||||
if (is_init(current)) {
|
||||
if (is_global_init(current)) {
|
||||
yield();
|
||||
down_read(&mm->mmap_sem);
|
||||
goto survive;
|
||||
|
|
|
@ -211,7 +211,7 @@ static int do_out_of_memory(struct pt_regs *regs, unsigned long error_code,
|
|||
struct mm_struct *mm = tsk->mm;
|
||||
|
||||
up_read(&mm->mmap_sem);
|
||||
if (is_init(tsk)) {
|
||||
if (is_global_init(tsk)) {
|
||||
yield();
|
||||
down_read(&mm->mmap_sem);
|
||||
return 1;
|
||||
|
|
|
@ -207,7 +207,7 @@ no_context:
|
|||
*/
|
||||
out_of_memory:
|
||||
up_read(&mm->mmap_sem);
|
||||
if (is_init(current)) {
|
||||
if (is_global_init(current)) {
|
||||
yield();
|
||||
down_read(&mm->mmap_sem);
|
||||
goto survive;
|
||||
|
|
|
@ -278,7 +278,7 @@ bad_area:
|
|||
show_regs(regs);
|
||||
#endif
|
||||
}
|
||||
if (is_init(tsk)) {
|
||||
if (is_global_init(tsk)) {
|
||||
panic("INIT had user mode bad_area\n");
|
||||
}
|
||||
tsk->thread.address = address;
|
||||
|
@ -320,14 +320,14 @@ no_context:
|
|||
* us unable to handle the page fault gracefully.
|
||||
*/
|
||||
out_of_memory:
|
||||
if (is_init(current)) {
|
||||
if (is_global_init(current)) {
|
||||
panic("INIT out of memory\n");
|
||||
yield();
|
||||
goto survive;
|
||||
}
|
||||
printk("fault:Out of memory\n");
|
||||
up_read(&mm->mmap_sem);
|
||||
if (is_init(current)) {
|
||||
if (is_global_init(current)) {
|
||||
yield();
|
||||
down_read(&mm->mmap_sem);
|
||||
goto survive;
|
||||
|
|
|
@ -108,7 +108,7 @@ out_nosemaphore:
|
|||
* us unable to handle the page fault gracefully.
|
||||
*/
|
||||
out_of_memory:
|
||||
if (is_init(current)) {
|
||||
if (is_global_init(current)) {
|
||||
up_read(&mm->mmap_sem);
|
||||
yield();
|
||||
down_read(&mm->mmap_sem);
|
||||
|
|
|
@ -748,7 +748,7 @@ survive:
|
|||
retval = get_user_pages(current, current->mm,
|
||||
(unsigned long )to, 1, 1, 0, &pg, NULL);
|
||||
|
||||
if (retval == -ENOMEM && is_init(current)) {
|
||||
if (retval == -ENOMEM && is_global_init(current)) {
|
||||
up_read(¤t->mm->mmap_sem);
|
||||
congestion_wait(WRITE, HZ/50);
|
||||
goto survive;
|
||||
|
|
|
@ -587,7 +587,7 @@ no_context:
|
|||
*/
|
||||
out_of_memory:
|
||||
up_read(&mm->mmap_sem);
|
||||
if (is_init(tsk)) {
|
||||
if (is_global_init(tsk)) {
|
||||
yield();
|
||||
down_read(&mm->mmap_sem);
|
||||
goto survive;
|
||||
|
|
|
@ -554,7 +554,7 @@ no_context:
|
|||
*/
|
||||
out_of_memory:
|
||||
up_read(&mm->mmap_sem);
|
||||
if (is_init(current)) {
|
||||
if (is_global_init(current)) {
|
||||
yield();
|
||||
goto again;
|
||||
}
|
||||
|
|
|
@ -145,7 +145,7 @@ bad_area:
|
|||
*/
|
||||
out_of_memory:
|
||||
up_read(&mm->mmap_sem);
|
||||
if (is_init(current)) {
|
||||
if (is_global_init(current)) {
|
||||
yield();
|
||||
down_read(&mm->mmap_sem);
|
||||
goto survive;
|
||||
|
|
|
@ -251,7 +251,7 @@ static void send_sig_all(int sig)
|
|||
struct task_struct *p;
|
||||
|
||||
for_each_process(p) {
|
||||
if (p->mm && !is_init(p))
|
||||
if (p->mm && !is_global_init(p))
|
||||
/* Not swapper, init nor kernel thread */
|
||||
force_sig(sig, p);
|
||||
}
|
||||
|
|
|
@ -1237,12 +1237,20 @@ static inline int pid_alive(struct task_struct *p)
|
|||
}
|
||||
|
||||
/**
|
||||
* is_init - check if a task structure is init
|
||||
* is_global_init - check if a task structure is init
|
||||
* @tsk: Task structure to be checked.
|
||||
*
|
||||
* Check if a task structure is the first user space task the kernel created.
|
||||
*
|
||||
* TODO: We should inline this function after some cleanups in pid_namespace.h
|
||||
*/
|
||||
static inline int is_init(struct task_struct *tsk)
|
||||
extern int is_global_init(struct task_struct *tsk);
|
||||
|
||||
/*
|
||||
* is_container_init:
|
||||
* check whether in the task is init in its own pid namespace.
|
||||
*/
|
||||
static inline int is_container_init(struct task_struct *tsk)
|
||||
{
|
||||
return tsk->pid == 1;
|
||||
}
|
||||
|
|
|
@ -12,6 +12,7 @@
|
|||
#include <linux/module.h>
|
||||
#include <linux/security.h>
|
||||
#include <linux/syscalls.h>
|
||||
#include <linux/pid_namespace.h>
|
||||
#include <asm/uaccess.h>
|
||||
|
||||
/*
|
||||
|
@ -129,7 +130,7 @@ static inline int cap_set_all(kernel_cap_t *effective,
|
|||
int found = 0;
|
||||
|
||||
do_each_thread(g, target) {
|
||||
if (target == current || is_init(target))
|
||||
if (target == current || is_container_init(target->group_leader))
|
||||
continue;
|
||||
found = 1;
|
||||
if (security_capset_check(target, effective, inheritable,
|
||||
|
|
|
@ -221,7 +221,7 @@ static int will_become_orphaned_pgrp(struct pid *pgrp, struct task_struct *ignor
|
|||
do_each_pid_task(pgrp, PIDTYPE_PGID, p) {
|
||||
if (p == ignored_task
|
||||
|| p->exit_state
|
||||
|| is_init(p->real_parent))
|
||||
|| is_global_init(p->real_parent))
|
||||
continue;
|
||||
if (task_pgrp(p->real_parent) != pgrp &&
|
||||
task_session(p->real_parent) == task_session(p)) {
|
||||
|
|
|
@ -51,7 +51,7 @@ struct resource crashk_res = {
|
|||
|
||||
int kexec_should_crash(struct task_struct *p)
|
||||
{
|
||||
if (in_interrupt() || !p->pid || is_init(p) || panic_on_oops)
|
||||
if (in_interrupt() || !p->pid || is_global_init(p) || panic_on_oops)
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -70,6 +70,11 @@ struct pid_namespace init_pid_ns = {
|
|||
.child_reaper = &init_task
|
||||
};
|
||||
|
||||
int is_global_init(struct task_struct *tsk)
|
||||
{
|
||||
return tsk == init_pid_ns.child_reaper;
|
||||
}
|
||||
|
||||
/*
|
||||
* Note: disable interrupts while the pidmap_lock is held as an
|
||||
* interrupt might come in and do read_lock(&tasklist_lock).
|
||||
|
|
|
@ -256,7 +256,7 @@ flush_signal_handlers(struct task_struct *t, int force_default)
|
|||
|
||||
int unhandled_signal(struct task_struct *tsk, int sig)
|
||||
{
|
||||
if (is_init(tsk))
|
||||
if (is_global_init(tsk))
|
||||
return 1;
|
||||
if (tsk->ptrace & PT_PTRACED)
|
||||
return 0;
|
||||
|
|
|
@ -1888,7 +1888,7 @@ int proc_dointvec_bset(struct ctl_table *table, int write, struct file *filp,
|
|||
return -EPERM;
|
||||
}
|
||||
|
||||
op = is_init(current) ? OP_SET : OP_AND;
|
||||
op = is_global_init(current) ? OP_SET : OP_AND;
|
||||
return do_proc_dointvec(table,write,filp,buffer,lenp,ppos,
|
||||
do_proc_dointvec_bset_conv,&op);
|
||||
}
|
||||
|
|
|
@ -212,7 +212,7 @@ static struct task_struct *select_bad_process(unsigned long *ppoints)
|
|||
if (!p->mm)
|
||||
continue;
|
||||
/* skip the init task */
|
||||
if (is_init(p))
|
||||
if (is_global_init(p))
|
||||
continue;
|
||||
|
||||
/*
|
||||
|
@ -265,7 +265,7 @@ static struct task_struct *select_bad_process(unsigned long *ppoints)
|
|||
*/
|
||||
static void __oom_kill_task(struct task_struct *p, int verbose)
|
||||
{
|
||||
if (is_init(p)) {
|
||||
if (is_global_init(p)) {
|
||||
WARN_ON(1);
|
||||
printk(KERN_WARNING "tried to kill init!\n");
|
||||
return;
|
||||
|
|
|
@ -23,6 +23,7 @@
|
|||
#include <linux/xattr.h>
|
||||
#include <linux/hugetlb.h>
|
||||
#include <linux/mount.h>
|
||||
#include <linux/sched.h>
|
||||
|
||||
#ifdef CONFIG_SECURITY_FILE_CAPABILITIES
|
||||
/*
|
||||
|
@ -334,7 +335,7 @@ void cap_bprm_apply_creds (struct linux_binprm *bprm, int unsafe)
|
|||
/* For init, we want to retain the capabilities set
|
||||
* in the init_task struct. Thus we skip the usual
|
||||
* capability rules */
|
||||
if (!is_init(current)) {
|
||||
if (!is_global_init(current)) {
|
||||
current->cap_permitted = new_permitted;
|
||||
current->cap_effective = bprm->cap_effective ?
|
||||
new_permitted : 0;
|
||||
|
|
Loading…
Reference in New Issue