OpenCloudOS-Kernel/arch/mips/mm/fault.c

253 lines
6.3 KiB
C
Raw Normal View History

/*
* This file is subject to the terms and conditions of the GNU General Public
* License. See the file "COPYING" in the main directory of this archive
* for more details.
*
* Copyright (C) 1995 - 2000 by Ralf Baechle
*/
#include <linux/signal.h>
#include <linux/sched.h>
#include <linux/interrupt.h>
#include <linux/kernel.h>
#include <linux/errno.h>
#include <linux/string.h>
#include <linux/types.h>
#include <linux/ptrace.h>
#include <linux/mman.h>
#include <linux/mm.h>
#include <linux/smp.h>
#include <linux/vt_kern.h> /* For unblank_screen() */
#include <linux/module.h>
#include <asm/branch.h>
#include <asm/mmu_context.h>
#include <asm/system.h>
#include <asm/uaccess.h>
#include <asm/ptrace.h>
#include <asm/highmem.h> /* For VMALLOC_END */
/*
* This routine handles page faults. It determines the address,
* and the problem, and then passes it off to one of the appropriate
* routines.
*/
asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long write,
unsigned long address)
{
struct vm_area_struct * vma = NULL;
struct task_struct *tsk = current;
struct mm_struct *mm = tsk->mm;
const int field = sizeof(unsigned long) * 2;
siginfo_t info;
mm: fault feedback #2 This patch completes Linus's wish that the fault return codes be made into bit flags, which I agree makes everything nicer. This requires requires all handle_mm_fault callers to be modified (possibly the modifications should go further and do things like fault accounting in handle_mm_fault -- however that would be for another patch). [akpm@linux-foundation.org: fix alpha build] [akpm@linux-foundation.org: fix s390 build] [akpm@linux-foundation.org: fix sparc build] [akpm@linux-foundation.org: fix sparc64 build] [akpm@linux-foundation.org: fix ia64 build] Signed-off-by: Nick Piggin <npiggin@suse.de> Cc: Richard Henderson <rth@twiddle.net> Cc: Ivan Kokshaysky <ink@jurassic.park.msu.ru> Cc: Russell King <rmk@arm.linux.org.uk> Cc: Ian Molton <spyro@f2s.com> Cc: Bryan Wu <bryan.wu@analog.com> Cc: Mikael Starvik <starvik@axis.com> Cc: David Howells <dhowells@redhat.com> Cc: Yoshinori Sato <ysato@users.sourceforge.jp> Cc: "Luck, Tony" <tony.luck@intel.com> Cc: Hirokazu Takata <takata@linux-m32r.org> Cc: Geert Uytterhoeven <geert@linux-m68k.org> Cc: Roman Zippel <zippel@linux-m68k.org> Cc: Greg Ungerer <gerg@uclinux.org> Cc: Matthew Wilcox <willy@debian.org> Cc: Paul Mackerras <paulus@samba.org> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org> Cc: Heiko Carstens <heiko.carstens@de.ibm.com> Cc: Martin Schwidefsky <schwidefsky@de.ibm.com> Cc: Paul Mundt <lethal@linux-sh.org> Cc: Kazumoto Kojima <kkojima@rr.iij4u.or.jp> Cc: Richard Curnow <rc@rc0.org.uk> Cc: William Lee Irwin III <wli@holomorphy.com> Cc: "David S. Miller" <davem@davemloft.net> Cc: Jeff Dike <jdike@addtoit.com> Cc: Paolo 'Blaisorblade' Giarrusso <blaisorblade@yahoo.it> Cc: Miles Bader <uclinux-v850@lsi.nec.co.jp> Cc: Chris Zankel <chris@zankel.net> Acked-by: Kyle McMartin <kyle@mcmartin.ca> Acked-by: Haavard Skinnemoen <hskinnemoen@atmel.com> Acked-by: Ralf Baechle <ralf@linux-mips.org> Acked-by: Andi Kleen <ak@muc.de> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> [ Still apparently needs some ARM and PPC loving - Linus ] Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2007-07-19 16:47:05 +08:00
int fault;
#if 0
printk("Cpu%d[%s:%d:%0*lx:%ld:%0*lx]\n", raw_smp_processor_id(),
current->comm, current->pid, field, address, write,
field, regs->cp0_epc);
#endif
info.si_code = SEGV_MAPERR;
/*
* We fault-in kernel-space virtual memory on-demand. The
* 'reference' page table is init_mm.pgd.
*
* NOTE! We MUST NOT take any locks for this case. We may
* be in an interrupt or a critical region, and should
* only copy the information from the master page table,
* nothing more.
*/
if (unlikely(address >= VMALLOC_START && address <= VMALLOC_END))
goto vmalloc_fault;
[MIPS] Load modules to CKSEG0 if CONFIG_BUILD_ELF64=n This is a patch to load 64-bit modules to CKSEG0 so that can be compiled with -msym32 option. This makes each module ~10% smaller. * introduce MODULE_START and MODULE_END * custom module_alloc() * PGD for modules * change XTLB refill handler synthesizer * enable -msym32 for modules again (revert ca78b1a5c6a6e70e052d3ea253828e49b5d07c8a) New XTLB refill handler looks like this: 80000080 dmfc0 k0,C0_BADVADDR 80000084 bltz k0,800000e4 # goto l_module_alloc 80000088 lui k1,0x8046 # %high(pgd_current) 8000008c ld k1,24600(k1) # %low(pgd_current) 80000090 dsrl k0,k0,0x1b # l_vmalloc_done: 80000094 andi k0,k0,0x1ff8 80000098 daddu k1,k1,k0 8000009c dmfc0 k0,C0_BADVADDR 800000a0 ld k1,0(k1) 800000a4 dsrl k0,k0,0x12 800000a8 andi k0,k0,0xff8 800000ac daddu k1,k1,k0 800000b0 dmfc0 k0,C0_XCONTEXT 800000b4 ld k1,0(k1) 800000b8 andi k0,k0,0xff0 800000bc daddu k1,k1,k0 800000c0 ld k0,0(k1) 800000c4 ld k1,8(k1) 800000c8 dsrl k0,k0,0x6 800000cc mtc0 k0,C0_ENTRYLO0 800000d0 dsrl k1,k1,0x6 800000d4 mtc0 k1,C0_ENTRYL01 800000d8 nop 800000dc tlbwr 800000e0 eret 800000e4 dsll k1,k0,0x2 # l_module_alloc: 800000e8 bgez k1,80000008 # goto l_vmalloc 800000ec lui k1,0xc000 800000f0 dsubu k0,k0,k1 800000f4 lui k1,0x8046 # %high(module_pg_dir) 800000f8 beq zero,zero,80000000 800000fc nop 80000000 beq zero,zero,80000090 # goto l_vmalloc_done 80000004 daddiu k1,k1,0x4000 80000008 dsll32 k1,k1,0x0 # l_vmalloc: 8000000c dsubu k0,k0,k1 80000010 beq zero,zero,80000090 # goto l_vmalloc_done 80000014 lui k1,0x8046 # %high(swapper_pg_dir) Signed-off-by: Atsushi Nemoto <anemo@mba.ocn.ne.jp> Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
2006-10-25 23:08:31 +08:00
#ifdef MODULE_START
if (unlikely(address >= MODULE_START && address < MODULE_END))
goto vmalloc_fault;
#endif
/*
* If we're in an interrupt or have no user
* context, we must not take the fault..
*/
if (in_atomic() || !mm)
goto bad_area_nosemaphore;
down_read(&mm->mmap_sem);
vma = find_vma(mm, address);
if (!vma)
goto bad_area;
if (vma->vm_start <= address)
goto good_area;
if (!(vma->vm_flags & VM_GROWSDOWN))
goto bad_area;
if (expand_stack(vma, address))
goto bad_area;
/*
* Ok, we have a good vm_area for this memory access, so
* we can handle it..
*/
good_area:
info.si_code = SEGV_ACCERR;
if (write) {
if (!(vma->vm_flags & VM_WRITE))
goto bad_area;
} else {
if (!(vma->vm_flags & (VM_READ | VM_WRITE | VM_EXEC)))
goto bad_area;
}
survive:
/*
* If for any reason at all we couldn't handle the fault,
* make sure we exit gracefully rather than endlessly redo
* the fault.
*/
mm: fault feedback #2 This patch completes Linus's wish that the fault return codes be made into bit flags, which I agree makes everything nicer. This requires requires all handle_mm_fault callers to be modified (possibly the modifications should go further and do things like fault accounting in handle_mm_fault -- however that would be for another patch). [akpm@linux-foundation.org: fix alpha build] [akpm@linux-foundation.org: fix s390 build] [akpm@linux-foundation.org: fix sparc build] [akpm@linux-foundation.org: fix sparc64 build] [akpm@linux-foundation.org: fix ia64 build] Signed-off-by: Nick Piggin <npiggin@suse.de> Cc: Richard Henderson <rth@twiddle.net> Cc: Ivan Kokshaysky <ink@jurassic.park.msu.ru> Cc: Russell King <rmk@arm.linux.org.uk> Cc: Ian Molton <spyro@f2s.com> Cc: Bryan Wu <bryan.wu@analog.com> Cc: Mikael Starvik <starvik@axis.com> Cc: David Howells <dhowells@redhat.com> Cc: Yoshinori Sato <ysato@users.sourceforge.jp> Cc: "Luck, Tony" <tony.luck@intel.com> Cc: Hirokazu Takata <takata@linux-m32r.org> Cc: Geert Uytterhoeven <geert@linux-m68k.org> Cc: Roman Zippel <zippel@linux-m68k.org> Cc: Greg Ungerer <gerg@uclinux.org> Cc: Matthew Wilcox <willy@debian.org> Cc: Paul Mackerras <paulus@samba.org> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org> Cc: Heiko Carstens <heiko.carstens@de.ibm.com> Cc: Martin Schwidefsky <schwidefsky@de.ibm.com> Cc: Paul Mundt <lethal@linux-sh.org> Cc: Kazumoto Kojima <kkojima@rr.iij4u.or.jp> Cc: Richard Curnow <rc@rc0.org.uk> Cc: William Lee Irwin III <wli@holomorphy.com> Cc: "David S. Miller" <davem@davemloft.net> Cc: Jeff Dike <jdike@addtoit.com> Cc: Paolo 'Blaisorblade' Giarrusso <blaisorblade@yahoo.it> Cc: Miles Bader <uclinux-v850@lsi.nec.co.jp> Cc: Chris Zankel <chris@zankel.net> Acked-by: Kyle McMartin <kyle@mcmartin.ca> Acked-by: Haavard Skinnemoen <hskinnemoen@atmel.com> Acked-by: Ralf Baechle <ralf@linux-mips.org> Acked-by: Andi Kleen <ak@muc.de> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> [ Still apparently needs some ARM and PPC loving - Linus ] Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2007-07-19 16:47:05 +08:00
fault = handle_mm_fault(mm, vma, address, write);
if (unlikely(fault & VM_FAULT_ERROR)) {
if (fault & VM_FAULT_OOM)
goto out_of_memory;
else if (fault & VM_FAULT_SIGBUS)
goto do_sigbus;
BUG();
}
mm: fault feedback #2 This patch completes Linus's wish that the fault return codes be made into bit flags, which I agree makes everything nicer. This requires requires all handle_mm_fault callers to be modified (possibly the modifications should go further and do things like fault accounting in handle_mm_fault -- however that would be for another patch). [akpm@linux-foundation.org: fix alpha build] [akpm@linux-foundation.org: fix s390 build] [akpm@linux-foundation.org: fix sparc build] [akpm@linux-foundation.org: fix sparc64 build] [akpm@linux-foundation.org: fix ia64 build] Signed-off-by: Nick Piggin <npiggin@suse.de> Cc: Richard Henderson <rth@twiddle.net> Cc: Ivan Kokshaysky <ink@jurassic.park.msu.ru> Cc: Russell King <rmk@arm.linux.org.uk> Cc: Ian Molton <spyro@f2s.com> Cc: Bryan Wu <bryan.wu@analog.com> Cc: Mikael Starvik <starvik@axis.com> Cc: David Howells <dhowells@redhat.com> Cc: Yoshinori Sato <ysato@users.sourceforge.jp> Cc: "Luck, Tony" <tony.luck@intel.com> Cc: Hirokazu Takata <takata@linux-m32r.org> Cc: Geert Uytterhoeven <geert@linux-m68k.org> Cc: Roman Zippel <zippel@linux-m68k.org> Cc: Greg Ungerer <gerg@uclinux.org> Cc: Matthew Wilcox <willy@debian.org> Cc: Paul Mackerras <paulus@samba.org> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org> Cc: Heiko Carstens <heiko.carstens@de.ibm.com> Cc: Martin Schwidefsky <schwidefsky@de.ibm.com> Cc: Paul Mundt <lethal@linux-sh.org> Cc: Kazumoto Kojima <kkojima@rr.iij4u.or.jp> Cc: Richard Curnow <rc@rc0.org.uk> Cc: William Lee Irwin III <wli@holomorphy.com> Cc: "David S. Miller" <davem@davemloft.net> Cc: Jeff Dike <jdike@addtoit.com> Cc: Paolo 'Blaisorblade' Giarrusso <blaisorblade@yahoo.it> Cc: Miles Bader <uclinux-v850@lsi.nec.co.jp> Cc: Chris Zankel <chris@zankel.net> Acked-by: Kyle McMartin <kyle@mcmartin.ca> Acked-by: Haavard Skinnemoen <hskinnemoen@atmel.com> Acked-by: Ralf Baechle <ralf@linux-mips.org> Acked-by: Andi Kleen <ak@muc.de> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> [ Still apparently needs some ARM and PPC loving - Linus ] Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2007-07-19 16:47:05 +08:00
if (fault & VM_FAULT_MAJOR)
tsk->maj_flt++;
else
tsk->min_flt++;
up_read(&mm->mmap_sem);
return;
/*
* Something tried to access memory that isn't in our memory map..
* Fix it, but check if it's kernel or user first..
*/
bad_area:
up_read(&mm->mmap_sem);
bad_area_nosemaphore:
/* User mode accesses just cause a SIGSEGV */
if (user_mode(regs)) {
tsk->thread.cp0_badvaddr = address;
tsk->thread.error_code = write;
#if 0
printk("do_page_fault() #2: sending SIGSEGV to %s for "
"invalid %s\n%0*lx (epc == %0*lx, ra == %0*lx)\n",
tsk->comm,
write ? "write access to" : "read access from",
field, address,
field, (unsigned long) regs->cp0_epc,
field, (unsigned long) regs->regs[31]);
#endif
info.si_signo = SIGSEGV;
info.si_errno = 0;
/* info.si_code has been set above */
info.si_addr = (void __user *) address;
force_sig_info(SIGSEGV, &info, tsk);
return;
}
no_context:
/* Are we prepared to handle this kernel fault? */
if (fixup_exception(regs)) {
current->thread.cp0_baduaddr = address;
return;
}
/*
* Oops. The kernel tried to access some bad page. We'll have to
* terminate things with extreme prejudice.
*/
bust_spinlocks(1);
printk(KERN_ALERT "CPU %d Unable to handle kernel paging request at "
"virtual address %0*lx, epc == %0*lx, ra == %0*lx\n",
raw_smp_processor_id(), field, address, field, regs->cp0_epc,
field, regs->regs[31]);
die("Oops", regs);
/*
* We ran out of memory, or some other thing happened to us that made
* us unable to handle the page fault gracefully.
*/
out_of_memory:
up_read(&mm->mmap_sem);
pid namespaces: define is_global_init() and is_container_init() is_init() is an ambiguous name for the pid==1 check. Split it into is_global_init() and is_container_init(). A cgroup init has it's tsk->pid == 1. A global init also has it's tsk->pid == 1 and it's active pid namespace is the init_pid_ns. But rather than check the active pid namespace, compare the task structure with 'init_pid_ns.child_reaper', which is initialized during boot to the /sbin/init process and never changes. Changelog: 2.6.22-rc4-mm2-pidns1: - Use 'init_pid_ns.child_reaper' to determine if a given task is the global init (/sbin/init) process. This would improve performance and remove dependence on the task_pid(). 2.6.21-mm2-pidns2: - [Sukadev Bhattiprolu] Changed is_container_init() calls in {powerpc, ppc,avr32}/traps.c for the _exception() call to is_global_init(). This way, we kill only the cgroup if the cgroup's init has a bug rather than force a kernel panic. [akpm@linux-foundation.org: fix comment] [sukadev@us.ibm.com: Use is_global_init() in arch/m32r/mm/fault.c] [bunk@stusta.de: kernel/pid.c: remove unused exports] [sukadev@us.ibm.com: Fix capability.c to work with threaded init] Signed-off-by: Serge E. Hallyn <serue@us.ibm.com> Signed-off-by: Sukadev Bhattiprolu <sukadev@us.ibm.com> Acked-by: Pavel Emelianov <xemul@openvz.org> Cc: Eric W. Biederman <ebiederm@xmission.com> Cc: Cedric Le Goater <clg@fr.ibm.com> Cc: Dave Hansen <haveblue@us.ibm.com> Cc: Herbert Poetzel <herbert@13thfloor.at> Cc: Kirill Korotaev <dev@sw.ru> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2007-10-19 14:39:52 +08:00
if (is_global_init(tsk)) {
yield();
down_read(&mm->mmap_sem);
goto survive;
}
printk("VM: killing process %s\n", tsk->comm);
if (user_mode(regs))
During VM oom condition, kill all threads in process group We have had complaints where a threaded application is left in a bad state after one of it's threads is killed when we hit a VM: out_of_memory condition. Killing just one of the process threads can leave the application in a bad state, whereas killing the entire process group would allow for the application to restart, or be otherwise handled, and makes it very obvious that something has gone wrong. This change allows the entire process group to be taken down, rather than just the one thread. Signed-off-by: Will Schmidt <will_schmidt@vnet.ibm.com> Cc: Richard Henderson <rth@twiddle.net> Cc: Ivan Kokshaysky <ink@jurassic.park.msu.ru> Cc: Russell King <rmk@arm.linux.org.uk> Cc: Ian Molton <spyro@f2s.com> Cc: Haavard Skinnemoen <hskinnemoen@atmel.com> Cc: Mikael Starvik <starvik@axis.com> Cc: David Howells <dhowells@redhat.com> Cc: Andi Kleen <ak@suse.de> Cc: "Luck, Tony" <tony.luck@intel.com> Cc: Hirokazu Takata <takata@linux-m32r.org> Cc: Geert Uytterhoeven <geert@linux-m68k.org> Cc: Roman Zippel <zippel@linux-m68k.org> Cc: Ralf Baechle <ralf@linux-mips.org> Cc: Kyle McMartin <kyle@mcmartin.ca> Cc: Matthew Wilcox <willy@debian.org> Cc: Paul Mackerras <paulus@samba.org> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org> Cc: Heiko Carstens <heiko.carstens@de.ibm.com> Cc: Martin Schwidefsky <schwidefsky@de.ibm.com> Cc: Paul Mundt <lethal@linux-sh.org> Cc: Kazumoto Kojima <kkojima@rr.iij4u.or.jp> Cc: Richard Curnow <rc@rc0.org.uk> Cc: William Lee Irwin III <wli@holomorphy.com> Cc: "David S. Miller" <davem@davemloft.net> Cc: Chris Zankel <chris@zankel.net> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2007-10-16 16:24:18 +08:00
do_group_exit(SIGKILL);
goto no_context;
do_sigbus:
up_read(&mm->mmap_sem);
/* Kernel mode? Handle exceptions or die */
if (!user_mode(regs))
goto no_context;
else
/*
* Send a sigbus, regardless of whether we were in kernel
* or user mode.
*/
#if 0
printk("do_page_fault() #3: sending SIGBUS to %s for "
"invalid %s\n%0*lx (epc == %0*lx, ra == %0*lx)\n",
tsk->comm,
write ? "write access to" : "read access from",
field, address,
field, (unsigned long) regs->cp0_epc,
field, (unsigned long) regs->regs[31]);
#endif
tsk->thread.cp0_badvaddr = address;
info.si_signo = SIGBUS;
info.si_errno = 0;
info.si_code = BUS_ADRERR;
info.si_addr = (void __user *) address;
force_sig_info(SIGBUS, &info, tsk);
return;
vmalloc_fault:
{
/*
* Synchronize this task's top level page-table
* with the 'reference' page table.
*
* Do _not_ use "tsk" here. We might be inside
* an interrupt in the middle of a task switch..
*/
int offset = __pgd_offset(address);
pgd_t *pgd, *pgd_k;
pud_t *pud, *pud_k;
pmd_t *pmd, *pmd_k;
pte_t *pte_k;
pgd = (pgd_t *) pgd_current[raw_smp_processor_id()] + offset;
pgd_k = init_mm.pgd + offset;
if (!pgd_present(*pgd_k))
goto no_context;
set_pgd(pgd, *pgd_k);
pud = pud_offset(pgd, address);
pud_k = pud_offset(pgd_k, address);
if (!pud_present(*pud_k))
goto no_context;
pmd = pmd_offset(pud, address);
pmd_k = pmd_offset(pud_k, address);
if (!pmd_present(*pmd_k))
goto no_context;
set_pmd(pmd, *pmd_k);
pte_k = pte_offset_kernel(pmd_k, address);
if (!pte_present(*pte_k))
goto no_context;
return;
}
}