OpenCloudOS-Kernel/arch/um/kernel/trap.c

319 lines
7.6 KiB
C
Raw Normal View History

/*
* Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
* Licensed under the GPL
*/
#include <linux/mm.h>
#include <linux/sched.h>
#include <linux/hardirq.h>
#include <linux/module.h>
#include <asm/current.h>
#include <asm/pgtable.h>
#include <asm/tlbflush.h>
#include <arch.h>
#include <as-layout.h>
#include <kern_util.h>
#include <os.h>
#include <skas.h>
/*
* Note this is constrained to return 0, -EFAULT, -EACCESS, -ENOMEM by
* segv().
*/
int handle_page_fault(unsigned long address, unsigned long ip,
int is_write, int is_user, int *code_out)
{
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma;
pgd_t *pgd;
pud_t *pud;
pmd_t *pmd;
pte_t *pte;
int err = -EFAULT;
unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE |
(is_write ? FAULT_FLAG_WRITE : 0);
*code_out = SEGV_MAPERR;
/*
* If the fault was during atomic operation, don't take the fault, just
* fail.
*/
if (in_atomic())
goto out_nosemaphore;
retry:
down_read(&mm->mmap_sem);
vma = find_vma(mm, address);
if (!vma)
goto out;
else if (vma->vm_start <= address)
goto good_area;
else if (!(vma->vm_flags & VM_GROWSDOWN))
goto out;
else if (is_user && !ARCH_IS_STACKGROW(address))
goto out;
else if (expand_stack(vma, address))
goto out;
good_area:
*code_out = SEGV_ACCERR;
if (is_write && !(vma->vm_flags & VM_WRITE))
goto out;
/* Don't require VM_READ|VM_EXEC for write faults! */
if (!is_write && !(vma->vm_flags & (VM_READ | VM_EXEC)))
goto out;
do {
mm: fault feedback #2 This patch completes Linus's wish that the fault return codes be made into bit flags, which I agree makes everything nicer. This requires requires all handle_mm_fault callers to be modified (possibly the modifications should go further and do things like fault accounting in handle_mm_fault -- however that would be for another patch). [akpm@linux-foundation.org: fix alpha build] [akpm@linux-foundation.org: fix s390 build] [akpm@linux-foundation.org: fix sparc build] [akpm@linux-foundation.org: fix sparc64 build] [akpm@linux-foundation.org: fix ia64 build] Signed-off-by: Nick Piggin <npiggin@suse.de> Cc: Richard Henderson <rth@twiddle.net> Cc: Ivan Kokshaysky <ink@jurassic.park.msu.ru> Cc: Russell King <rmk@arm.linux.org.uk> Cc: Ian Molton <spyro@f2s.com> Cc: Bryan Wu <bryan.wu@analog.com> Cc: Mikael Starvik <starvik@axis.com> Cc: David Howells <dhowells@redhat.com> Cc: Yoshinori Sato <ysato@users.sourceforge.jp> Cc: "Luck, Tony" <tony.luck@intel.com> Cc: Hirokazu Takata <takata@linux-m32r.org> Cc: Geert Uytterhoeven <geert@linux-m68k.org> Cc: Roman Zippel <zippel@linux-m68k.org> Cc: Greg Ungerer <gerg@uclinux.org> Cc: Matthew Wilcox <willy@debian.org> Cc: Paul Mackerras <paulus@samba.org> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org> Cc: Heiko Carstens <heiko.carstens@de.ibm.com> Cc: Martin Schwidefsky <schwidefsky@de.ibm.com> Cc: Paul Mundt <lethal@linux-sh.org> Cc: Kazumoto Kojima <kkojima@rr.iij4u.or.jp> Cc: Richard Curnow <rc@rc0.org.uk> Cc: William Lee Irwin III <wli@holomorphy.com> Cc: "David S. Miller" <davem@davemloft.net> Cc: Jeff Dike <jdike@addtoit.com> Cc: Paolo 'Blaisorblade' Giarrusso <blaisorblade@yahoo.it> Cc: Miles Bader <uclinux-v850@lsi.nec.co.jp> Cc: Chris Zankel <chris@zankel.net> Acked-by: Kyle McMartin <kyle@mcmartin.ca> Acked-by: Haavard Skinnemoen <hskinnemoen@atmel.com> Acked-by: Ralf Baechle <ralf@linux-mips.org> Acked-by: Andi Kleen <ak@muc.de> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> [ Still apparently needs some ARM and PPC loving - Linus ] Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2007-07-19 16:47:05 +08:00
int fault;
fault = handle_mm_fault(mm, vma, address, flags);
if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current))
goto out_nosemaphore;
mm: fault feedback #2 This patch completes Linus's wish that the fault return codes be made into bit flags, which I agree makes everything nicer. This requires requires all handle_mm_fault callers to be modified (possibly the modifications should go further and do things like fault accounting in handle_mm_fault -- however that would be for another patch). [akpm@linux-foundation.org: fix alpha build] [akpm@linux-foundation.org: fix s390 build] [akpm@linux-foundation.org: fix sparc build] [akpm@linux-foundation.org: fix sparc64 build] [akpm@linux-foundation.org: fix ia64 build] Signed-off-by: Nick Piggin <npiggin@suse.de> Cc: Richard Henderson <rth@twiddle.net> Cc: Ivan Kokshaysky <ink@jurassic.park.msu.ru> Cc: Russell King <rmk@arm.linux.org.uk> Cc: Ian Molton <spyro@f2s.com> Cc: Bryan Wu <bryan.wu@analog.com> Cc: Mikael Starvik <starvik@axis.com> Cc: David Howells <dhowells@redhat.com> Cc: Yoshinori Sato <ysato@users.sourceforge.jp> Cc: "Luck, Tony" <tony.luck@intel.com> Cc: Hirokazu Takata <takata@linux-m32r.org> Cc: Geert Uytterhoeven <geert@linux-m68k.org> Cc: Roman Zippel <zippel@linux-m68k.org> Cc: Greg Ungerer <gerg@uclinux.org> Cc: Matthew Wilcox <willy@debian.org> Cc: Paul Mackerras <paulus@samba.org> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org> Cc: Heiko Carstens <heiko.carstens@de.ibm.com> Cc: Martin Schwidefsky <schwidefsky@de.ibm.com> Cc: Paul Mundt <lethal@linux-sh.org> Cc: Kazumoto Kojima <kkojima@rr.iij4u.or.jp> Cc: Richard Curnow <rc@rc0.org.uk> Cc: William Lee Irwin III <wli@holomorphy.com> Cc: "David S. Miller" <davem@davemloft.net> Cc: Jeff Dike <jdike@addtoit.com> Cc: Paolo 'Blaisorblade' Giarrusso <blaisorblade@yahoo.it> Cc: Miles Bader <uclinux-v850@lsi.nec.co.jp> Cc: Chris Zankel <chris@zankel.net> Acked-by: Kyle McMartin <kyle@mcmartin.ca> Acked-by: Haavard Skinnemoen <hskinnemoen@atmel.com> Acked-by: Ralf Baechle <ralf@linux-mips.org> Acked-by: Andi Kleen <ak@muc.de> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> [ Still apparently needs some ARM and PPC loving - Linus ] Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2007-07-19 16:47:05 +08:00
if (unlikely(fault & VM_FAULT_ERROR)) {
if (fault & VM_FAULT_OOM) {
goto out_of_memory;
} else if (fault & VM_FAULT_SIGBUS) {
err = -EACCES;
goto out;
}
BUG();
}
if (flags & FAULT_FLAG_ALLOW_RETRY) {
if (fault & VM_FAULT_MAJOR)
current->maj_flt++;
else
current->min_flt++;
if (fault & VM_FAULT_RETRY) {
flags &= ~FAULT_FLAG_ALLOW_RETRY;
flags |= FAULT_FLAG_TRIED;
goto retry;
}
}
mm: fault feedback #2 This patch completes Linus's wish that the fault return codes be made into bit flags, which I agree makes everything nicer. This requires requires all handle_mm_fault callers to be modified (possibly the modifications should go further and do things like fault accounting in handle_mm_fault -- however that would be for another patch). [akpm@linux-foundation.org: fix alpha build] [akpm@linux-foundation.org: fix s390 build] [akpm@linux-foundation.org: fix sparc build] [akpm@linux-foundation.org: fix sparc64 build] [akpm@linux-foundation.org: fix ia64 build] Signed-off-by: Nick Piggin <npiggin@suse.de> Cc: Richard Henderson <rth@twiddle.net> Cc: Ivan Kokshaysky <ink@jurassic.park.msu.ru> Cc: Russell King <rmk@arm.linux.org.uk> Cc: Ian Molton <spyro@f2s.com> Cc: Bryan Wu <bryan.wu@analog.com> Cc: Mikael Starvik <starvik@axis.com> Cc: David Howells <dhowells@redhat.com> Cc: Yoshinori Sato <ysato@users.sourceforge.jp> Cc: "Luck, Tony" <tony.luck@intel.com> Cc: Hirokazu Takata <takata@linux-m32r.org> Cc: Geert Uytterhoeven <geert@linux-m68k.org> Cc: Roman Zippel <zippel@linux-m68k.org> Cc: Greg Ungerer <gerg@uclinux.org> Cc: Matthew Wilcox <willy@debian.org> Cc: Paul Mackerras <paulus@samba.org> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org> Cc: Heiko Carstens <heiko.carstens@de.ibm.com> Cc: Martin Schwidefsky <schwidefsky@de.ibm.com> Cc: Paul Mundt <lethal@linux-sh.org> Cc: Kazumoto Kojima <kkojima@rr.iij4u.or.jp> Cc: Richard Curnow <rc@rc0.org.uk> Cc: William Lee Irwin III <wli@holomorphy.com> Cc: "David S. Miller" <davem@davemloft.net> Cc: Jeff Dike <jdike@addtoit.com> Cc: Paolo 'Blaisorblade' Giarrusso <blaisorblade@yahoo.it> Cc: Miles Bader <uclinux-v850@lsi.nec.co.jp> Cc: Chris Zankel <chris@zankel.net> Acked-by: Kyle McMartin <kyle@mcmartin.ca> Acked-by: Haavard Skinnemoen <hskinnemoen@atmel.com> Acked-by: Ralf Baechle <ralf@linux-mips.org> Acked-by: Andi Kleen <ak@muc.de> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> [ Still apparently needs some ARM and PPC loving - Linus ] Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2007-07-19 16:47:05 +08:00
pgd = pgd_offset(mm, address);
pud = pud_offset(pgd, address);
pmd = pmd_offset(pud, address);
pte = pte_offset_kernel(pmd, address);
} while (!pte_present(*pte));
err = 0;
/*
* The below warning was added in place of
* pte_mkyoung(); if (is_write) pte_mkdirty();
* If it's triggered, we'd see normally a hang here (a clean pte is
* marked read-only to emulate the dirty bit).
* However, the generic code can mark a PTE writable but clean on a
* concurrent read fault, triggering this harmlessly. So comment it out.
*/
#if 0
WARN_ON(!pte_young(*pte) || (is_write && !pte_dirty(*pte)));
#endif
flush_tlb_page(vma, address);
out:
up_read(&mm->mmap_sem);
out_nosemaphore:
return err;
out_of_memory:
/*
* We ran out of memory, call the OOM killer, and return the userspace
* (which will retry the fault, or kill us if we got oom-killed).
*/
up_read(&mm->mmap_sem);
if (!is_user)
goto out_nosemaphore;
pagefault_out_of_memory();
return 0;
}
EXPORT_SYMBOL(handle_page_fault);
static void show_segv_info(struct uml_pt_regs *regs)
{
struct task_struct *tsk = current;
struct faultinfo *fi = UPT_FAULTINFO(regs);
if (!unhandled_signal(tsk, SIGSEGV))
return;
if (!printk_ratelimit())
return;
printk("%s%s[%d]: segfault at %lx ip %p sp %p error %x",
task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG,
tsk->comm, task_pid_nr(tsk), FAULT_ADDRESS(*fi),
(void *)UPT_IP(regs), (void *)UPT_SP(regs),
fi->error_code);
print_vma_addr(KERN_CONT " in ", UPT_IP(regs));
printk(KERN_CONT "\n");
}
static void bad_segv(struct faultinfo fi, unsigned long ip)
{
struct siginfo si;
si.si_signo = SIGSEGV;
si.si_code = SEGV_ACCERR;
si.si_addr = (void __user *) FAULT_ADDRESS(fi);
current->thread.arch.faultinfo = fi;
force_sig_info(SIGSEGV, &si, current);
}
uml: kill processes instead of panicing kernel UML was panicing in the case of failures of libc calls which shouldn't happen. This is an overreaction since a failure from libc doesn't normally mean that kernel data structures are in an unknown state. Instead, the current process should just be killed if there is no way to recover. The case that prompted this was a failure of PTRACE_SETREGS restoring the same state that was read by PTRACE_GETREGS. It appears that when a process tries to load a bogus value into a segment register, it segfaults (as expected) and the value is actually loaded and is seen by PTRACE_GETREGS (not expected). This case is fixed by forcing a fatal SIGSEGV on the process so that it immediately dies. fatal_sigsegv was added for this purpose. It was declared as noreturn, so in order to pursuade gcc that it actually does not return, I added a call to os_dump_core (and declared it noreturn) so that I get a core file if somehow the process survives. All other calls in arch/um/os-Linux/skas/process.c got the same treatment, with failures causing the process to die instead of a kernel panic, with some exceptions. userspace_tramp exits with status 1 if anything goes wrong there. That will cause start_userspace to return an error. copy_context_skas0 and map_stub_pages also now return errors instead of panicing. Callers of thes functions were changed to check for errors and do something appropriate. Usually that's to return an error to their callers. check_skas3_ptrace_faultinfo just exits since that's too early to do anything else. save_registers, restore_registers, and init_registers now return status instead of panicing on failure, with their callers doing something appropriate. There were also duplicate declarations of save_registers and restore_registers in os.h - these are gone. I noticed and fixed up some whitespace damage. Signed-off-by: Jeff Dike <jdike@linux.intel.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2008-02-05 14:30:58 +08:00
void fatal_sigsegv(void)
{
force_sigsegv(SIGSEGV, current);
do_signal();
/*
* This is to tell gcc that we're not returning - do_signal
* can, in general, return, but in this case, it's not, since
* we just got a fatal SIGSEGV queued.
*/
os_dump_core();
}
void segv_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs)
{
struct faultinfo * fi = UPT_FAULTINFO(regs);
if (UPT_IS_USER(regs) && !SEGV_IS_FIXABLE(fi)) {
show_segv_info(regs);
bad_segv(*fi, UPT_IP(regs));
return;
}
segv(*fi, UPT_IP(regs), UPT_IS_USER(regs), regs);
}
[PATCH] uml: S390 preparation, abstract host page fault data This patch removes the arch-specific fault/trap-infos from thread and skas-regs. It adds a new struct faultinfo, that is arch-specific defined in sysdep/faultinfo.h. The structure is inserted in thread.arch and thread.regs.skas and thread.regs.tt Now, segv and other trap-handlers can copy the contents from regs.X.faultinfo to thread.arch.faultinfo with one simple assignment. Also, the number of macros necessary is reduced to FAULT_ADDRESS(struct faultinfo) extracts the faulting address from faultinfo FAULT_WRITE(struct faultinfo) extracts the "is_write" flag SEGV_IS_FIXABLE(struct faultinfo) is true for the fixable segvs, i.e. (TRAP == 14) on i386 UPT_FAULTINFO(regs) result is (struct faultinfo *) to the faultinfo in regs->skas.faultinfo GET_FAULTINFO_FROM_SC(struct faultinfo, struct sigcontext *) copies the relevant parts of the sigcontext to struct faultinfo. On SIGSEGV, call user_signal() instead of handle_segv(), if the architecture provides the information needed in PTRACE_FAULTINFO, or if PTRACE_FAULTINFO is missing, because segv-stub will provide the info. The benefit of the change is, that in case of a non-fixable SIGSEGV, we can give user processes a SIGSEGV, instead of possibly looping on pagefault handling. Since handle_segv() sikked arch_fixup() implicitly by passing ip==0 to segv(), I changed segv() to call arch_fixup() only, if !is_user. Signed-off-by: Bodo Stroesser <bstroesser@fujitsu-siemens.com> Signed-off-by: Jeff Dike <jdike@addtoit.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-05-06 07:15:31 +08:00
/*
* We give a *copy* of the faultinfo in the regs to segv.
* This must be done, since nesting SEGVs could overwrite
* the info in the regs. A pointer to the info then would
* give us bad data!
*/
unsigned long segv(struct faultinfo fi, unsigned long ip, int is_user,
struct uml_pt_regs *regs)
{
struct siginfo si;
jmp_buf *catcher;
int err;
int is_write = FAULT_WRITE(fi);
unsigned long address = FAULT_ADDRESS(fi);
if (!is_user && (address >= start_vm) && (address < end_vm)) {
flush_tlb_kernel_vm();
return 0;
}
else if (current->mm == NULL) {
show_regs(container_of(regs, struct pt_regs, regs));
panic("Segfault with no mm");
}
if (SEGV_IS_FIXABLE(&fi) || SEGV_MAYBE_FIXABLE(&fi))
err = handle_page_fault(address, ip, is_write, is_user,
&si.si_code);
else {
err = -EFAULT;
/*
* A thread accessed NULL, we get a fault, but CR2 is invalid.
* This code is used in __do_copy_from_user() of TT mode.
* XXX tt mode is gone, so maybe this isn't needed any more
*/
address = 0;
}
catcher = current->thread.fault_catcher;
if (!err)
return 0;
else if (catcher != NULL) {
current->thread.fault_addr = (void *) address;
UML_LONGJMP(catcher, 1);
}
else if (current->thread.fault_addr != NULL)
panic("fault_addr set but no fault catcher");
else if (!is_user && arch_fixup(ip, regs))
return 0;
if (!is_user) {
show_regs(container_of(regs, struct pt_regs, regs));
panic("Kernel mode fault at addr 0x%lx, ip 0x%lx",
address, ip);
}
show_segv_info(regs);
if (err == -EACCES) {
si.si_signo = SIGBUS;
si.si_errno = 0;
si.si_code = BUS_ADRERR;
si.si_addr = (void __user *)address;
current->thread.arch.faultinfo = fi;
force_sig_info(SIGBUS, &si, current);
} else {
BUG_ON(err != -EFAULT);
si.si_signo = SIGSEGV;
si.si_addr = (void __user *) address;
current->thread.arch.faultinfo = fi;
force_sig_info(SIGSEGV, &si, current);
}
return 0;
}
void relay_signal(int sig, struct siginfo *si, struct uml_pt_regs *regs)
{
struct faultinfo *fi;
struct siginfo clean_si;
if (!UPT_IS_USER(regs)) {
if (sig == SIGBUS)
printk(KERN_ERR "Bus error - the host /dev/shm or /tmp "
"mount likely just ran out of space\n");
panic("Kernel mode signal %d", sig);
}
arch_examine_signal(sig, regs);
memset(&clean_si, 0, sizeof(clean_si));
clean_si.si_signo = si->si_signo;
clean_si.si_errno = si->si_errno;
clean_si.si_code = si->si_code;
switch (sig) {
case SIGILL:
case SIGFPE:
case SIGSEGV:
case SIGBUS:
case SIGTRAP:
fi = UPT_FAULTINFO(regs);
clean_si.si_addr = (void __user *) FAULT_ADDRESS(*fi);
current->thread.arch.faultinfo = *fi;
#ifdef __ARCH_SI_TRAPNO
clean_si.si_trapno = si->si_trapno;
#endif
break;
default:
printk(KERN_ERR "Attempted to relay unknown signal %d (si_code = %d)\n",
sig, si->si_code);
}
force_sig_info(sig, &clean_si, current);
}
void bus_handler(int sig, struct siginfo *si, struct uml_pt_regs *regs)
{
if (current->thread.fault_catcher != NULL)
UML_LONGJMP(current->thread.fault_catcher, 1);
else
relay_signal(sig, si, regs);
}
void winch(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs)
{
do_IRQ(WINCH_IRQ, regs);
}
void trap_init(void)
{
}