2005-04-17 06:20:36 +08:00
|
|
|
/*
|
|
|
|
* fs/proc/kcore.c kernel ELF core dumper
|
|
|
|
*
|
|
|
|
* Modelled on fs/exec.c:aout_core_dump()
|
|
|
|
* Jeremy Fitzhardinge <jeremy@sw.oz.au>
|
|
|
|
* ELF version written by David Howells <David.Howells@nexor.co.uk>
|
|
|
|
* Modified and incorporated into 2.3.x by Tigran Aivazian <tigran@veritas.com>
|
|
|
|
* Support to dump vmalloc'd areas (ELF only), Tigran Aivazian <tigran@veritas.com>
|
|
|
|
* Safe accesses to vmalloc/direct-mapped discontiguous areas, Kanoj Sarcar <kanoj@sgi.com>
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include <linux/mm.h>
|
|
|
|
#include <linux/proc_fs.h>
|
2013-04-12 07:10:25 +08:00
|
|
|
#include <linux/kcore.h>
|
2005-04-17 06:20:36 +08:00
|
|
|
#include <linux/user.h>
|
2006-01-12 04:17:46 +08:00
|
|
|
#include <linux/capability.h>
|
2005-04-17 06:20:36 +08:00
|
|
|
#include <linux/elf.h>
|
|
|
|
#include <linux/elfcore.h>
|
2013-04-30 06:08:08 +08:00
|
|
|
#include <linux/notifier.h>
|
2005-04-17 06:20:36 +08:00
|
|
|
#include <linux/vmalloc.h>
|
|
|
|
#include <linux/highmem.h>
|
2013-02-28 09:03:16 +08:00
|
|
|
#include <linux/printk.h>
|
2009-09-23 07:45:48 +08:00
|
|
|
#include <linux/bootmem.h>
|
2005-04-17 06:20:36 +08:00
|
|
|
#include <linux/init.h>
|
include cleanup: Update gfp.h and slab.h includes to prepare for breaking implicit slab.h inclusion from percpu.h
percpu.h is included by sched.h and module.h and thus ends up being
included when building most .c files. percpu.h includes slab.h which
in turn includes gfp.h making everything defined by the two files
universally available and complicating inclusion dependencies.
percpu.h -> slab.h dependency is about to be removed. Prepare for
this change by updating users of gfp and slab facilities include those
headers directly instead of assuming availability. As this conversion
needs to touch large number of source files, the following script is
used as the basis of conversion.
http://userweb.kernel.org/~tj/misc/slabh-sweep.py
The script does the followings.
* Scan files for gfp and slab usages and update includes such that
only the necessary includes are there. ie. if only gfp is used,
gfp.h, if slab is used, slab.h.
* When the script inserts a new include, it looks at the include
blocks and try to put the new include such that its order conforms
to its surrounding. It's put in the include block which contains
core kernel includes, in the same order that the rest are ordered -
alphabetical, Christmas tree, rev-Xmas-tree or at the end if there
doesn't seem to be any matching order.
* If the script can't find a place to put a new include (mostly
because the file doesn't have fitting include block), it prints out
an error message indicating which .h file needs to be added to the
file.
The conversion was done in the following steps.
1. The initial automatic conversion of all .c files updated slightly
over 4000 files, deleting around 700 includes and adding ~480 gfp.h
and ~3000 slab.h inclusions. The script emitted errors for ~400
files.
2. Each error was manually checked. Some didn't need the inclusion,
some needed manual addition while adding it to implementation .h or
embedding .c file was more appropriate for others. This step added
inclusions to around 150 files.
3. The script was run again and the output was compared to the edits
from #2 to make sure no file was left behind.
4. Several build tests were done and a couple of problems were fixed.
e.g. lib/decompress_*.c used malloc/free() wrappers around slab
APIs requiring slab.h to be added manually.
5. The script was run on all .h files but without automatically
editing them as sprinkling gfp.h and slab.h inclusions around .h
files could easily lead to inclusion dependency hell. Most gfp.h
inclusion directives were ignored as stuff from gfp.h was usually
wildly available and often used in preprocessor macros. Each
slab.h inclusion directive was examined and added manually as
necessary.
6. percpu.h was updated not to include slab.h.
7. Build test were done on the following configurations and failures
were fixed. CONFIG_GCOV_KERNEL was turned off for all tests (as my
distributed build env didn't work with gcov compiles) and a few
more options had to be turned off depending on archs to make things
build (like ipr on powerpc/64 which failed due to missing writeq).
* x86 and x86_64 UP and SMP allmodconfig and a custom test config.
* powerpc and powerpc64 SMP allmodconfig
* sparc and sparc64 SMP allmodconfig
* ia64 SMP allmodconfig
* s390 SMP allmodconfig
* alpha SMP allmodconfig
* um on x86_64 SMP allmodconfig
8. percpu.h modifications were reverted so that it could be applied as
a separate patch and serve as bisection point.
Given the fact that I had only a couple of failures from tests on step
6, I'm fairly confident about the coverage of this conversion patch.
If there is a breakage, it's likely to be something in one of the arch
headers which should be easily discoverable easily on most builds of
the specific arch.
Signed-off-by: Tejun Heo <tj@kernel.org>
Guess-its-ok-by: Christoph Lameter <cl@linux-foundation.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Lee Schermerhorn <Lee.Schermerhorn@hp.com>
2010-03-24 16:04:11 +08:00
|
|
|
#include <linux/slab.h>
|
2016-12-25 03:46:01 +08:00
|
|
|
#include <linux/uaccess.h>
|
2005-04-17 06:20:36 +08:00
|
|
|
#include <asm/io.h>
|
2009-09-23 07:45:41 +08:00
|
|
|
#include <linux/list.h>
|
2009-09-23 07:45:48 +08:00
|
|
|
#include <linux/ioport.h>
|
|
|
|
#include <linux/memory.h>
|
2009-09-23 07:45:45 +08:00
|
|
|
#include <asm/sections.h>
|
2013-04-11 20:34:43 +08:00
|
|
|
#include "internal.h"
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2006-12-07 12:38:00 +08:00
|
|
|
#define CORE_STR "CORE"
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2008-07-25 16:48:10 +08:00
|
|
|
#ifndef ELF_CORE_EFLAGS
|
|
|
|
#define ELF_CORE_EFLAGS 0
|
|
|
|
#endif
|
|
|
|
|
2008-10-06 18:14:19 +08:00
|
|
|
static struct proc_dir_entry *proc_root_kcore;
|
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
#ifndef kc_vaddr_to_offset
|
|
|
|
#define kc_vaddr_to_offset(v) ((v) - PAGE_OFFSET)
|
|
|
|
#endif
|
|
|
|
#ifndef kc_offset_to_vaddr
|
|
|
|
#define kc_offset_to_vaddr(o) ((o) + PAGE_OFFSET)
|
|
|
|
#endif
|
|
|
|
|
|
|
|
/* An ELF note in memory */
|
|
|
|
struct memelfnote
|
|
|
|
{
|
|
|
|
const char *name;
|
|
|
|
int type;
|
|
|
|
unsigned int datasz;
|
|
|
|
void *data;
|
|
|
|
};
|
|
|
|
|
2009-09-23 07:45:41 +08:00
|
|
|
static LIST_HEAD(kclist_head);
|
2005-04-17 06:20:36 +08:00
|
|
|
static DEFINE_RWLOCK(kclist_lock);
|
2009-09-23 07:45:48 +08:00
|
|
|
static int kcore_need_update = 1;
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
void
|
2009-09-23 07:45:43 +08:00
|
|
|
kclist_add(struct kcore_list *new, void *addr, size_t size, int type)
|
2005-04-17 06:20:36 +08:00
|
|
|
{
|
|
|
|
new->addr = (unsigned long)addr;
|
|
|
|
new->size = size;
|
2009-09-23 07:45:43 +08:00
|
|
|
new->type = type;
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
write_lock(&kclist_lock);
|
2009-09-23 07:45:41 +08:00
|
|
|
list_add_tail(&new->list, &kclist_head);
|
2005-04-17 06:20:36 +08:00
|
|
|
write_unlock(&kclist_lock);
|
|
|
|
}
|
|
|
|
|
|
|
|
static size_t get_kcore_size(int *nphdr, size_t *elf_buflen)
|
|
|
|
{
|
|
|
|
size_t try, size;
|
|
|
|
struct kcore_list *m;
|
|
|
|
|
|
|
|
*nphdr = 1; /* PT_NOTE */
|
|
|
|
size = 0;
|
|
|
|
|
2009-09-23 07:45:41 +08:00
|
|
|
list_for_each_entry(m, &kclist_head, list) {
|
2005-04-17 06:20:36 +08:00
|
|
|
try = kc_vaddr_to_offset((size_t)m->addr + m->size);
|
|
|
|
if (try > size)
|
|
|
|
size = try;
|
|
|
|
*nphdr = *nphdr + 1;
|
|
|
|
}
|
|
|
|
*elf_buflen = sizeof(struct elfhdr) +
|
|
|
|
(*nphdr + 2)*sizeof(struct elf_phdr) +
|
2006-12-07 12:38:00 +08:00
|
|
|
3 * ((sizeof(struct elf_note)) +
|
|
|
|
roundup(sizeof(CORE_STR), 4)) +
|
|
|
|
roundup(sizeof(struct elf_prstatus), 4) +
|
|
|
|
roundup(sizeof(struct elf_prpsinfo), 4) +
|
2015-07-17 18:28:12 +08:00
|
|
|
roundup(arch_task_struct_size, 4);
|
2005-04-17 06:20:36 +08:00
|
|
|
*elf_buflen = PAGE_ALIGN(*elf_buflen);
|
|
|
|
return size + *elf_buflen;
|
|
|
|
}
|
|
|
|
|
2009-09-23 07:45:48 +08:00
|
|
|
static void free_kclist_ents(struct list_head *head)
|
|
|
|
{
|
|
|
|
struct kcore_list *tmp, *pos;
|
|
|
|
|
|
|
|
list_for_each_entry_safe(pos, tmp, head, list) {
|
|
|
|
list_del(&pos->list);
|
|
|
|
kfree(pos);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
/*
|
2009-09-23 07:45:49 +08:00
|
|
|
* Replace all KCORE_RAM/KCORE_VMEMMAP information with passed list.
|
2009-09-23 07:45:48 +08:00
|
|
|
*/
|
|
|
|
static void __kcore_update_ram(struct list_head *list)
|
|
|
|
{
|
2009-09-23 07:45:51 +08:00
|
|
|
int nphdr;
|
|
|
|
size_t size;
|
2009-09-23 07:45:48 +08:00
|
|
|
struct kcore_list *tmp, *pos;
|
|
|
|
LIST_HEAD(garbage);
|
|
|
|
|
|
|
|
write_lock(&kclist_lock);
|
|
|
|
if (kcore_need_update) {
|
|
|
|
list_for_each_entry_safe(pos, tmp, &kclist_head, list) {
|
2009-09-23 07:45:49 +08:00
|
|
|
if (pos->type == KCORE_RAM
|
|
|
|
|| pos->type == KCORE_VMEMMAP)
|
2009-09-23 07:45:48 +08:00
|
|
|
list_move(&pos->list, &garbage);
|
|
|
|
}
|
|
|
|
list_splice_tail(list, &kclist_head);
|
|
|
|
} else
|
|
|
|
list_splice(list, &garbage);
|
|
|
|
kcore_need_update = 0;
|
2009-09-23 07:45:51 +08:00
|
|
|
proc_root_kcore->size = get_kcore_size(&nphdr, &size);
|
2009-09-23 07:45:48 +08:00
|
|
|
write_unlock(&kclist_lock);
|
|
|
|
|
|
|
|
free_kclist_ents(&garbage);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
#ifdef CONFIG_HIGHMEM
|
|
|
|
/*
|
|
|
|
* If no highmem, we can assume [0...max_low_pfn) continuous range of memory
|
|
|
|
* because memory hole is not as big as !HIGHMEM case.
|
|
|
|
* (HIGHMEM is special because part of memory is _invisible_ from the kernel.)
|
|
|
|
*/
|
|
|
|
static int kcore_update_ram(void)
|
|
|
|
{
|
|
|
|
LIST_HEAD(head);
|
|
|
|
struct kcore_list *ent;
|
|
|
|
int ret = 0;
|
|
|
|
|
|
|
|
ent = kmalloc(sizeof(*ent), GFP_KERNEL);
|
|
|
|
if (!ent)
|
|
|
|
return -ENOMEM;
|
|
|
|
ent->addr = (unsigned long)__va(0);
|
|
|
|
ent->size = max_low_pfn << PAGE_SHIFT;
|
|
|
|
ent->type = KCORE_RAM;
|
|
|
|
list_add(&ent->list, &head);
|
|
|
|
__kcore_update_ram(&head);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
#else /* !CONFIG_HIGHMEM */
|
|
|
|
|
2009-09-23 07:45:49 +08:00
|
|
|
#ifdef CONFIG_SPARSEMEM_VMEMMAP
|
|
|
|
/* calculate vmemmap's address from given system ram pfn and register it */
|
2012-03-24 06:02:52 +08:00
|
|
|
static int
|
|
|
|
get_sparsemem_vmemmap_info(struct kcore_list *ent, struct list_head *head)
|
2009-09-23 07:45:49 +08:00
|
|
|
{
|
|
|
|
unsigned long pfn = __pa(ent->addr) >> PAGE_SHIFT;
|
|
|
|
unsigned long nr_pages = ent->size >> PAGE_SHIFT;
|
|
|
|
unsigned long start, end;
|
|
|
|
struct kcore_list *vmm, *tmp;
|
|
|
|
|
|
|
|
|
|
|
|
start = ((unsigned long)pfn_to_page(pfn)) & PAGE_MASK;
|
|
|
|
end = ((unsigned long)pfn_to_page(pfn + nr_pages)) - 1;
|
2014-08-09 05:21:20 +08:00
|
|
|
end = PAGE_ALIGN(end);
|
2009-09-23 07:45:49 +08:00
|
|
|
/* overlap check (because we have to align page */
|
|
|
|
list_for_each_entry(tmp, head, list) {
|
|
|
|
if (tmp->type != KCORE_VMEMMAP)
|
|
|
|
continue;
|
|
|
|
if (start < tmp->addr + tmp->size)
|
|
|
|
if (end > tmp->addr)
|
|
|
|
end = tmp->addr;
|
|
|
|
}
|
|
|
|
if (start < end) {
|
|
|
|
vmm = kmalloc(sizeof(*vmm), GFP_KERNEL);
|
|
|
|
if (!vmm)
|
|
|
|
return 0;
|
|
|
|
vmm->addr = start;
|
|
|
|
vmm->size = end - start;
|
|
|
|
vmm->type = KCORE_VMEMMAP;
|
|
|
|
list_add_tail(&vmm->list, head);
|
|
|
|
}
|
|
|
|
return 1;
|
|
|
|
|
|
|
|
}
|
|
|
|
#else
|
2012-03-24 06:02:52 +08:00
|
|
|
static int
|
|
|
|
get_sparsemem_vmemmap_info(struct kcore_list *ent, struct list_head *head)
|
2009-09-23 07:45:49 +08:00
|
|
|
{
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
#endif
|
|
|
|
|
2009-09-23 07:45:48 +08:00
|
|
|
static int
|
|
|
|
kclist_add_private(unsigned long pfn, unsigned long nr_pages, void *arg)
|
|
|
|
{
|
|
|
|
struct list_head *head = (struct list_head *)arg;
|
|
|
|
struct kcore_list *ent;
|
|
|
|
|
|
|
|
ent = kmalloc(sizeof(*ent), GFP_KERNEL);
|
|
|
|
if (!ent)
|
|
|
|
return -ENOMEM;
|
|
|
|
ent->addr = (unsigned long)__va((pfn << PAGE_SHIFT));
|
|
|
|
ent->size = nr_pages << PAGE_SHIFT;
|
|
|
|
|
|
|
|
/* Sanity check: Can happen in 32bit arch...maybe */
|
|
|
|
if (ent->addr < (unsigned long) __va(0))
|
|
|
|
goto free_out;
|
|
|
|
|
|
|
|
/* cut not-mapped area. ....from ppc-32 code. */
|
|
|
|
if (ULONG_MAX - ent->addr < ent->size)
|
|
|
|
ent->size = ULONG_MAX - ent->addr;
|
|
|
|
|
|
|
|
/* cut when vmalloc() area is higher than direct-map area */
|
|
|
|
if (VMALLOC_START > (unsigned long)__va(0)) {
|
|
|
|
if (ent->addr > VMALLOC_START)
|
|
|
|
goto free_out;
|
|
|
|
if (VMALLOC_START - ent->addr < ent->size)
|
|
|
|
ent->size = VMALLOC_START - ent->addr;
|
|
|
|
}
|
|
|
|
|
|
|
|
ent->type = KCORE_RAM;
|
|
|
|
list_add_tail(&ent->list, head);
|
2009-09-23 07:45:49 +08:00
|
|
|
|
|
|
|
if (!get_sparsemem_vmemmap_info(ent, head)) {
|
|
|
|
list_del(&ent->list);
|
|
|
|
goto free_out;
|
|
|
|
}
|
|
|
|
|
2009-09-23 07:45:48 +08:00
|
|
|
return 0;
|
|
|
|
free_out:
|
|
|
|
kfree(ent);
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int kcore_update_ram(void)
|
|
|
|
{
|
|
|
|
int nid, ret;
|
|
|
|
unsigned long end_pfn;
|
|
|
|
LIST_HEAD(head);
|
|
|
|
|
|
|
|
/* Not inialized....update now */
|
|
|
|
/* find out "max pfn" */
|
|
|
|
end_pfn = 0;
|
2012-12-13 05:51:25 +08:00
|
|
|
for_each_node_state(nid, N_MEMORY) {
|
2009-09-23 07:45:48 +08:00
|
|
|
unsigned long node_end;
|
2013-11-13 07:07:19 +08:00
|
|
|
node_end = node_end_pfn(nid);
|
2009-09-23 07:45:48 +08:00
|
|
|
if (end_pfn < node_end)
|
|
|
|
end_pfn = node_end;
|
|
|
|
}
|
|
|
|
/* scan 0 to max_pfn */
|
|
|
|
ret = walk_system_ram_range(0, end_pfn, &head, kclist_add_private);
|
|
|
|
if (ret) {
|
|
|
|
free_kclist_ents(&head);
|
|
|
|
return -ENOMEM;
|
|
|
|
}
|
|
|
|
__kcore_update_ram(&head);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
#endif /* CONFIG_HIGHMEM */
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
/*****************************************************************************/
|
|
|
|
/*
|
|
|
|
* determine size of ELF note
|
|
|
|
*/
|
|
|
|
static int notesize(struct memelfnote *en)
|
|
|
|
{
|
|
|
|
int sz;
|
|
|
|
|
|
|
|
sz = sizeof(struct elf_note);
|
2006-09-29 17:01:45 +08:00
|
|
|
sz += roundup((strlen(en->name) + 1), 4);
|
2005-04-17 06:20:36 +08:00
|
|
|
sz += roundup(en->datasz, 4);
|
|
|
|
|
|
|
|
return sz;
|
|
|
|
} /* end notesize() */
|
|
|
|
|
|
|
|
/*****************************************************************************/
|
|
|
|
/*
|
|
|
|
* store a note in the header buffer
|
|
|
|
*/
|
|
|
|
static char *storenote(struct memelfnote *men, char *bufp)
|
|
|
|
{
|
|
|
|
struct elf_note en;
|
|
|
|
|
|
|
|
#define DUMP_WRITE(addr,nr) do { memcpy(bufp,addr,nr); bufp += nr; } while(0)
|
|
|
|
|
2006-09-29 17:01:45 +08:00
|
|
|
en.n_namesz = strlen(men->name) + 1;
|
2005-04-17 06:20:36 +08:00
|
|
|
en.n_descsz = men->datasz;
|
|
|
|
en.n_type = men->type;
|
|
|
|
|
|
|
|
DUMP_WRITE(&en, sizeof(en));
|
|
|
|
DUMP_WRITE(men->name, en.n_namesz);
|
|
|
|
|
|
|
|
/* XXX - cast from long long to long to avoid need for libgcc.a */
|
|
|
|
bufp = (char*) roundup((unsigned long)bufp,4);
|
|
|
|
DUMP_WRITE(men->data, men->datasz);
|
|
|
|
bufp = (char*) roundup((unsigned long)bufp,4);
|
|
|
|
|
|
|
|
#undef DUMP_WRITE
|
|
|
|
|
|
|
|
return bufp;
|
|
|
|
} /* end storenote() */
|
|
|
|
|
|
|
|
/*
|
|
|
|
* store an ELF coredump header in the supplied buffer
|
|
|
|
* nphdr is the number of elf_phdr to insert
|
|
|
|
*/
|
|
|
|
static void elf_kcore_store_hdr(char *bufp, int nphdr, int dataoff)
|
|
|
|
{
|
|
|
|
struct elf_prstatus prstatus; /* NT_PRSTATUS */
|
|
|
|
struct elf_prpsinfo prpsinfo; /* NT_PRPSINFO */
|
|
|
|
struct elf_phdr *nhdr, *phdr;
|
|
|
|
struct elfhdr *elf;
|
|
|
|
struct memelfnote notes[3];
|
|
|
|
off_t offset = 0;
|
|
|
|
struct kcore_list *m;
|
|
|
|
|
|
|
|
/* setup ELF header */
|
|
|
|
elf = (struct elfhdr *) bufp;
|
|
|
|
bufp += sizeof(struct elfhdr);
|
|
|
|
offset += sizeof(struct elfhdr);
|
|
|
|
memcpy(elf->e_ident, ELFMAG, SELFMAG);
|
|
|
|
elf->e_ident[EI_CLASS] = ELF_CLASS;
|
|
|
|
elf->e_ident[EI_DATA] = ELF_DATA;
|
|
|
|
elf->e_ident[EI_VERSION]= EV_CURRENT;
|
|
|
|
elf->e_ident[EI_OSABI] = ELF_OSABI;
|
|
|
|
memset(elf->e_ident+EI_PAD, 0, EI_NIDENT-EI_PAD);
|
|
|
|
elf->e_type = ET_CORE;
|
|
|
|
elf->e_machine = ELF_ARCH;
|
|
|
|
elf->e_version = EV_CURRENT;
|
|
|
|
elf->e_entry = 0;
|
|
|
|
elf->e_phoff = sizeof(struct elfhdr);
|
|
|
|
elf->e_shoff = 0;
|
2008-07-25 16:48:10 +08:00
|
|
|
elf->e_flags = ELF_CORE_EFLAGS;
|
2005-04-17 06:20:36 +08:00
|
|
|
elf->e_ehsize = sizeof(struct elfhdr);
|
|
|
|
elf->e_phentsize= sizeof(struct elf_phdr);
|
|
|
|
elf->e_phnum = nphdr;
|
|
|
|
elf->e_shentsize= 0;
|
|
|
|
elf->e_shnum = 0;
|
|
|
|
elf->e_shstrndx = 0;
|
|
|
|
|
|
|
|
/* setup ELF PT_NOTE program header */
|
|
|
|
nhdr = (struct elf_phdr *) bufp;
|
|
|
|
bufp += sizeof(struct elf_phdr);
|
|
|
|
offset += sizeof(struct elf_phdr);
|
|
|
|
nhdr->p_type = PT_NOTE;
|
|
|
|
nhdr->p_offset = 0;
|
|
|
|
nhdr->p_vaddr = 0;
|
|
|
|
nhdr->p_paddr = 0;
|
|
|
|
nhdr->p_filesz = 0;
|
|
|
|
nhdr->p_memsz = 0;
|
|
|
|
nhdr->p_flags = 0;
|
|
|
|
nhdr->p_align = 0;
|
|
|
|
|
|
|
|
/* setup ELF PT_LOAD program header for every area */
|
2009-09-23 07:45:41 +08:00
|
|
|
list_for_each_entry(m, &kclist_head, list) {
|
2005-04-17 06:20:36 +08:00
|
|
|
phdr = (struct elf_phdr *) bufp;
|
|
|
|
bufp += sizeof(struct elf_phdr);
|
|
|
|
offset += sizeof(struct elf_phdr);
|
|
|
|
|
|
|
|
phdr->p_type = PT_LOAD;
|
|
|
|
phdr->p_flags = PF_R|PF_W|PF_X;
|
|
|
|
phdr->p_offset = kc_vaddr_to_offset(m->addr) + dataoff;
|
|
|
|
phdr->p_vaddr = (size_t)m->addr;
|
/proc/kcore: update physical address for kcore ram and text
Currently all the p_paddr of PT_LOAD headers are assigned to 0, which is
not true and could be misleading, since 0 is a valid physical address.
User space tools like makedumpfile needs to know physical address for
PT_LOAD segments of direct mapped regions. Therefore this patch updates
paddr for such regions. It also sets an invalid paddr (-1) for other
regions, so that user space tool can know whether a physical address
provided in PT_LOAD is correct or not.
I do not know why it was 0, which is a valid physical address. But
certainly, it might break some user space tools, and those need to be
fixed. For example, see following code from kexec-tools
kexec/kexec-elf.c:build_mem_phdrs()
if ((phdr->p_paddr + phdr->p_memsz) < phdr->p_paddr) {
/* The memory address wraps */
if (probe_debug) {
fprintf(stderr, "ELF address wrap around\n");
}
return -1;
}
We do not need to perform above check for an invalid physical address.
I think, kexec-tools and makedumpfile will need fixup. I already have
those fixup which will be sent upstream once this patch makes through.
Pro with this approach is that, it will help to calculate variable like
page_offset, phys_base from PT_LOAD even when they are randomized and
therefore will reduce many variable and version specific values in user
space tools.
Having an ASLR offset information can help to translate an identity
mapped virtual address to a physical address. But that would be an
additional field in PT_LOAD header structure and an arch dependent
value.
Moreover, sending a valid physical address like 0 does not seem right.
So, IMHO it is better to fix that and send valid physical address when
available (identity mapped).
Link: http://lkml.kernel.org/r/f951340d2917cdd2a329fae9837a83f2059dc3b2.1485318868.git.panand@redhat.com
Signed-off-by: Pratyush Anand <panand@redhat.com>
Cc: Baoquan He <bhe@redhat.com>
Cc: Dave Young <dyoung@redhat.com>
Cc: Dave Anderson <anderson@redhat.com>
Cc: Atsushi Kumagai <kumagai-atsushi@mxc.nes.nec.co.jp>
Cc: Simon Horman <simon.horman@netronome.com>
Cc: Kees Cook <keescook@chromium.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2017-02-28 06:27:31 +08:00
|
|
|
if (m->type == KCORE_RAM || m->type == KCORE_TEXT)
|
|
|
|
phdr->p_paddr = __pa(m->addr);
|
|
|
|
else
|
|
|
|
phdr->p_paddr = (elf_addr_t)-1;
|
2005-04-17 06:20:36 +08:00
|
|
|
phdr->p_filesz = phdr->p_memsz = m->size;
|
|
|
|
phdr->p_align = PAGE_SIZE;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Set up the notes in similar form to SVR4 core dumps made
|
|
|
|
* with info from their /proc.
|
|
|
|
*/
|
|
|
|
nhdr->p_offset = offset;
|
|
|
|
|
|
|
|
/* set up the process status */
|
2006-12-07 12:38:00 +08:00
|
|
|
notes[0].name = CORE_STR;
|
2005-04-17 06:20:36 +08:00
|
|
|
notes[0].type = NT_PRSTATUS;
|
|
|
|
notes[0].datasz = sizeof(struct elf_prstatus);
|
|
|
|
notes[0].data = &prstatus;
|
|
|
|
|
|
|
|
memset(&prstatus, 0, sizeof(struct elf_prstatus));
|
|
|
|
|
|
|
|
nhdr->p_filesz = notesize(¬es[0]);
|
|
|
|
bufp = storenote(¬es[0], bufp);
|
|
|
|
|
|
|
|
/* set up the process info */
|
2006-12-07 12:38:00 +08:00
|
|
|
notes[1].name = CORE_STR;
|
2005-04-17 06:20:36 +08:00
|
|
|
notes[1].type = NT_PRPSINFO;
|
|
|
|
notes[1].datasz = sizeof(struct elf_prpsinfo);
|
|
|
|
notes[1].data = &prpsinfo;
|
|
|
|
|
|
|
|
memset(&prpsinfo, 0, sizeof(struct elf_prpsinfo));
|
|
|
|
prpsinfo.pr_state = 0;
|
|
|
|
prpsinfo.pr_sname = 'R';
|
|
|
|
prpsinfo.pr_zomb = 0;
|
|
|
|
|
|
|
|
strcpy(prpsinfo.pr_fname, "vmlinux");
|
2013-07-04 06:08:28 +08:00
|
|
|
strlcpy(prpsinfo.pr_psargs, saved_command_line, sizeof(prpsinfo.pr_psargs));
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
nhdr->p_filesz += notesize(¬es[1]);
|
|
|
|
bufp = storenote(¬es[1], bufp);
|
|
|
|
|
|
|
|
/* set up the task structure */
|
2006-12-07 12:38:00 +08:00
|
|
|
notes[2].name = CORE_STR;
|
2005-04-17 06:20:36 +08:00
|
|
|
notes[2].type = NT_TASKSTRUCT;
|
2015-07-17 18:28:12 +08:00
|
|
|
notes[2].datasz = arch_task_struct_size;
|
2005-04-17 06:20:36 +08:00
|
|
|
notes[2].data = current;
|
|
|
|
|
|
|
|
nhdr->p_filesz += notesize(¬es[2]);
|
|
|
|
bufp = storenote(¬es[2], bufp);
|
|
|
|
|
|
|
|
} /* end elf_kcore_store_hdr() */
|
|
|
|
|
|
|
|
/*****************************************************************************/
|
|
|
|
/*
|
|
|
|
* read from the ELF header and then kernel memory
|
|
|
|
*/
|
|
|
|
static ssize_t
|
|
|
|
read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos)
|
|
|
|
{
|
2016-09-08 15:57:07 +08:00
|
|
|
char *buf = file->private_data;
|
2005-04-17 06:20:36 +08:00
|
|
|
ssize_t acc = 0;
|
|
|
|
size_t size, tsz;
|
|
|
|
size_t elf_buflen;
|
|
|
|
int nphdr;
|
|
|
|
unsigned long start;
|
|
|
|
|
|
|
|
read_lock(&kclist_lock);
|
2009-09-23 07:45:51 +08:00
|
|
|
size = get_kcore_size(&nphdr, &elf_buflen);
|
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
if (buflen == 0 || *fpos >= size) {
|
|
|
|
read_unlock(&kclist_lock);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* trim buflen to not go beyond EOF */
|
|
|
|
if (buflen > size - *fpos)
|
|
|
|
buflen = size - *fpos;
|
|
|
|
|
|
|
|
/* construct an ELF core header if we'll need some of it */
|
|
|
|
if (*fpos < elf_buflen) {
|
|
|
|
char * elf_buf;
|
|
|
|
|
|
|
|
tsz = elf_buflen - *fpos;
|
|
|
|
if (buflen < tsz)
|
|
|
|
tsz = buflen;
|
2006-09-27 16:49:37 +08:00
|
|
|
elf_buf = kzalloc(elf_buflen, GFP_ATOMIC);
|
2005-04-17 06:20:36 +08:00
|
|
|
if (!elf_buf) {
|
|
|
|
read_unlock(&kclist_lock);
|
|
|
|
return -ENOMEM;
|
|
|
|
}
|
|
|
|
elf_kcore_store_hdr(elf_buf, nphdr, elf_buflen);
|
|
|
|
read_unlock(&kclist_lock);
|
|
|
|
if (copy_to_user(buffer, elf_buf + *fpos, tsz)) {
|
|
|
|
kfree(elf_buf);
|
|
|
|
return -EFAULT;
|
|
|
|
}
|
|
|
|
kfree(elf_buf);
|
|
|
|
buflen -= tsz;
|
|
|
|
*fpos += tsz;
|
|
|
|
buffer += tsz;
|
|
|
|
acc += tsz;
|
|
|
|
|
|
|
|
/* leave now if filled buffer already */
|
|
|
|
if (buflen == 0)
|
|
|
|
return acc;
|
|
|
|
} else
|
|
|
|
read_unlock(&kclist_lock);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Check to see if our file offset matches with any of
|
|
|
|
* the addresses in the elf_phdr on our list.
|
|
|
|
*/
|
|
|
|
start = kc_offset_to_vaddr(*fpos - elf_buflen);
|
|
|
|
if ((tsz = (PAGE_SIZE - (start & ~PAGE_MASK))) > buflen)
|
|
|
|
tsz = buflen;
|
|
|
|
|
|
|
|
while (buflen) {
|
|
|
|
struct kcore_list *m;
|
|
|
|
|
|
|
|
read_lock(&kclist_lock);
|
2009-09-23 07:45:41 +08:00
|
|
|
list_for_each_entry(m, &kclist_head, list) {
|
2005-04-17 06:20:36 +08:00
|
|
|
if (start >= m->addr && start < (m->addr+m->size))
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
read_unlock(&kclist_lock);
|
|
|
|
|
2010-03-24 04:35:42 +08:00
|
|
|
if (&m->list == &kclist_head) {
|
2005-04-17 06:20:36 +08:00
|
|
|
if (clear_user(buffer, tsz))
|
|
|
|
return -EFAULT;
|
2009-09-23 07:45:49 +08:00
|
|
|
} else if (is_vmalloc_or_module_addr((void *)start)) {
|
2016-09-08 15:57:07 +08:00
|
|
|
vread(buf, (char *)start, tsz);
|
2009-09-22 08:02:35 +08:00
|
|
|
/* we have to zero-fill user buffer even if no read */
|
2016-09-08 15:57:07 +08:00
|
|
|
if (copy_to_user(buffer, buf, tsz))
|
2005-04-17 06:20:36 +08:00
|
|
|
return -EFAULT;
|
|
|
|
} else {
|
|
|
|
if (kern_addr_valid(start)) {
|
|
|
|
unsigned long n;
|
|
|
|
|
2016-09-08 15:57:08 +08:00
|
|
|
/*
|
|
|
|
* Using bounce buffer to bypass the
|
|
|
|
* hardened user copy kernel text checks.
|
|
|
|
*/
|
|
|
|
memcpy(buf, (char *) start, tsz);
|
|
|
|
n = copy_to_user(buffer, buf, tsz);
|
2005-04-17 06:20:36 +08:00
|
|
|
/*
|
2012-03-19 21:41:15 +08:00
|
|
|
* We cannot distinguish between fault on source
|
2005-04-17 06:20:36 +08:00
|
|
|
* and fault on destination. When this happens
|
|
|
|
* we clear too and hope it will trigger the
|
|
|
|
* EFAULT again.
|
|
|
|
*/
|
|
|
|
if (n) {
|
|
|
|
if (clear_user(buffer + tsz - n,
|
2006-07-13 00:03:07 +08:00
|
|
|
n))
|
2005-04-17 06:20:36 +08:00
|
|
|
return -EFAULT;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
if (clear_user(buffer, tsz))
|
|
|
|
return -EFAULT;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
buflen -= tsz;
|
|
|
|
*fpos += tsz;
|
|
|
|
buffer += tsz;
|
|
|
|
acc += tsz;
|
|
|
|
start += tsz;
|
|
|
|
tsz = (buflen > PAGE_SIZE ? PAGE_SIZE : buflen);
|
|
|
|
}
|
|
|
|
|
|
|
|
return acc;
|
|
|
|
}
|
2008-10-06 18:14:19 +08:00
|
|
|
|
2009-09-23 07:45:48 +08:00
|
|
|
|
|
|
|
static int open_kcore(struct inode *inode, struct file *filp)
|
|
|
|
{
|
|
|
|
if (!capable(CAP_SYS_RAWIO))
|
|
|
|
return -EPERM;
|
2016-09-08 15:57:07 +08:00
|
|
|
|
|
|
|
filp->private_data = kmalloc(PAGE_SIZE, GFP_KERNEL);
|
|
|
|
if (!filp->private_data)
|
|
|
|
return -ENOMEM;
|
|
|
|
|
2009-09-23 07:45:48 +08:00
|
|
|
if (kcore_need_update)
|
|
|
|
kcore_update_ram();
|
2009-09-23 07:45:52 +08:00
|
|
|
if (i_size_read(inode) != proc_root_kcore->size) {
|
2016-01-23 04:40:57 +08:00
|
|
|
inode_lock(inode);
|
2009-09-23 07:45:52 +08:00
|
|
|
i_size_write(inode, proc_root_kcore->size);
|
2016-01-23 04:40:57 +08:00
|
|
|
inode_unlock(inode);
|
2009-09-23 07:45:52 +08:00
|
|
|
}
|
2009-09-23 07:45:48 +08:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2016-09-08 15:57:07 +08:00
|
|
|
static int release_kcore(struct inode *inode, struct file *file)
|
|
|
|
{
|
|
|
|
kfree(file->private_data);
|
|
|
|
return 0;
|
|
|
|
}
|
2009-09-23 07:45:48 +08:00
|
|
|
|
|
|
|
static const struct file_operations proc_kcore_operations = {
|
|
|
|
.read = read_kcore,
|
|
|
|
.open = open_kcore,
|
2016-09-08 15:57:07 +08:00
|
|
|
.release = release_kcore,
|
2011-01-13 09:00:36 +08:00
|
|
|
.llseek = default_llseek,
|
2009-09-23 07:45:48 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
/* just remember that we have to update kcore */
|
|
|
|
static int __meminit kcore_callback(struct notifier_block *self,
|
|
|
|
unsigned long action, void *arg)
|
|
|
|
{
|
|
|
|
switch (action) {
|
|
|
|
case MEM_ONLINE:
|
|
|
|
case MEM_OFFLINE:
|
|
|
|
write_lock(&kclist_lock);
|
|
|
|
kcore_need_update = 1;
|
|
|
|
write_unlock(&kclist_lock);
|
|
|
|
}
|
|
|
|
return NOTIFY_OK;
|
|
|
|
}
|
|
|
|
|
2013-04-30 06:08:08 +08:00
|
|
|
static struct notifier_block kcore_callback_nb __meminitdata = {
|
|
|
|
.notifier_call = kcore_callback,
|
|
|
|
.priority = 0,
|
|
|
|
};
|
2009-09-23 07:45:48 +08:00
|
|
|
|
2009-09-23 07:45:44 +08:00
|
|
|
static struct kcore_list kcore_vmalloc;
|
|
|
|
|
2009-09-23 07:45:45 +08:00
|
|
|
#ifdef CONFIG_ARCH_PROC_KCORE_TEXT
|
|
|
|
static struct kcore_list kcore_text;
|
|
|
|
/*
|
|
|
|
* If defined, special segment is used for mapping kernel text instead of
|
|
|
|
* direct-map area. We need to create special TEXT section.
|
|
|
|
*/
|
|
|
|
static void __init proc_kcore_text_init(void)
|
|
|
|
{
|
kcore: add _text to KCORE_TEXT
Extend KCORE_TEXT to cover the pages between _text and _stext, to allow
examining some important page table pages.
`readelf -a` output on x86_64 before and after patch:
Type Offset VirtAddr PhysAddr
before LOAD 0x00007fff8100c000 0xffffffff81009000 0x0000000000000000
after LOAD 0x00007fff81003000 0xffffffff81000000 0x0000000000000000
The newly covered pages are:
0xffffffff81000000 <startup_64> etc.
0xffffffff81001000 <init_level4_pgt>
0xffffffff81002000 <level3_ident_pgt>
0xffffffff81003000 <level3_kernel_pgt>
0xffffffff81004000 <level2_fixmap_pgt>
0xffffffff81005000 <level1_fixmap_pgt>
0xffffffff81006000 <level2_ident_pgt>
0xffffffff81007000 <level2_kernel_pgt>
0xffffffff81008000 <level2_spare_pgt>
Before patch, /proc/kcore shows outdated contents for the above page
table pages, for example:
(gdb) p level3_ident_pgt
$1 = {<text variable, no debug info>} 0xffffffff81002000 <level3_ident_pgt>
(gdb) p/x *((pud_t *)&level3_ident_pgt)@512
$2 = {{pud = 0x1006063}, {pud = 0x0} <repeats 511 times>}
while the real content is:
root@hp /home/wfg# hexdump -s 0x1002000 -n 4096 /dev/mem
1002000 6063 0100 0000 0000 8067 0000 0000 0000
1002010 0000 0000 0000 0000 0000 0000 0000 0000
*
1003000
That is, on a x86_64 box with 2GB memory, we can see first-1GB / full-2GB
identity mapping before/after patch:
(gdb) p/x *((pud_t *)&level3_ident_pgt)@512
before $1 = {{pud = 0x1006063}, {pud = 0x0} <repeats 511 times>}
after $1 = {{pud = 0x1006063}, {pud = 0x8067}, {pud = 0x0} <repeats 510 times>}
Obviously the content before patch is wrong.
Signed-off-by: Wu Fengguang <fengguang.wu@intel.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2010-05-27 05:43:27 +08:00
|
|
|
kclist_add(&kcore_text, _text, _end - _text, KCORE_TEXT);
|
2009-09-23 07:45:45 +08:00
|
|
|
}
|
|
|
|
#else
|
|
|
|
static void __init proc_kcore_text_init(void)
|
|
|
|
{
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2009-09-23 07:45:49 +08:00
|
|
|
#if defined(CONFIG_MODULES) && defined(MODULES_VADDR)
|
|
|
|
/*
|
|
|
|
* MODULES_VADDR has no intersection with VMALLOC_ADDR.
|
|
|
|
*/
|
|
|
|
struct kcore_list kcore_modules;
|
|
|
|
static void __init add_modules_range(void)
|
|
|
|
{
|
2014-10-10 06:25:56 +08:00
|
|
|
if (MODULES_VADDR != VMALLOC_START && MODULES_END != VMALLOC_END) {
|
|
|
|
kclist_add(&kcore_modules, (void *)MODULES_VADDR,
|
2009-09-23 07:45:49 +08:00
|
|
|
MODULES_END - MODULES_VADDR, KCORE_VMALLOC);
|
2014-10-10 06:25:56 +08:00
|
|
|
}
|
2009-09-23 07:45:49 +08:00
|
|
|
}
|
|
|
|
#else
|
|
|
|
static void __init add_modules_range(void)
|
|
|
|
{
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2008-10-06 18:14:19 +08:00
|
|
|
static int __init proc_kcore_init(void)
|
|
|
|
{
|
2009-09-23 07:45:48 +08:00
|
|
|
proc_root_kcore = proc_create("kcore", S_IRUSR, NULL,
|
|
|
|
&proc_kcore_operations);
|
2009-09-23 07:45:50 +08:00
|
|
|
if (!proc_root_kcore) {
|
2013-02-28 09:03:16 +08:00
|
|
|
pr_err("couldn't create /proc/kcore\n");
|
2009-09-23 07:45:50 +08:00
|
|
|
return 0; /* Always returns 0. */
|
|
|
|
}
|
2009-09-23 07:45:48 +08:00
|
|
|
/* Store text area if it's special */
|
2009-09-23 07:45:45 +08:00
|
|
|
proc_kcore_text_init();
|
2009-09-23 07:45:48 +08:00
|
|
|
/* Store vmalloc area */
|
2009-09-23 07:45:44 +08:00
|
|
|
kclist_add(&kcore_vmalloc, (void *)VMALLOC_START,
|
|
|
|
VMALLOC_END - VMALLOC_START, KCORE_VMALLOC);
|
2009-09-23 07:45:49 +08:00
|
|
|
add_modules_range();
|
2009-09-23 07:45:48 +08:00
|
|
|
/* Store direct-map area from physical memory map */
|
|
|
|
kcore_update_ram();
|
2013-04-30 06:08:08 +08:00
|
|
|
register_hotmemory_notifier(&kcore_callback_nb);
|
2009-09-23 07:45:48 +08:00
|
|
|
|
2008-10-06 18:14:19 +08:00
|
|
|
return 0;
|
|
|
|
}
|
2014-01-24 07:55:45 +08:00
|
|
|
fs_initcall(proc_kcore_init);
|