OpenCloudOS-Kernel/fs/pstore/platform.c

317 lines
7.6 KiB
C
Raw Normal View History

/*
* Persistent Storage - platform driver interface parts.
*
* Copyright (C) 2007-2008 Google, Inc.
* Copyright (C) 2010 Intel Corporation <tony.luck@intel.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include <linux/atomic.h>
#include <linux/types.h>
#include <linux/errno.h>
#include <linux/init.h>
#include <linux/kmsg_dump.h>
#include <linux/console.h>
#include <linux/module.h>
#include <linux/pstore.h>
#include <linux/string.h>
#include <linux/timer.h>
#include <linux/slab.h>
#include <linux/uaccess.h>
#include <linux/hardirq.h>
#include <linux/jiffies.h>
#include <linux/workqueue.h>
#include "internal.h"
/*
* We defer making "oops" entries appear in pstore - see
* whether the system is actually still running well enough
* to let someone see the entry
*/
pstore/platform: Disable automatic updates by default Having automatic updates seems pointless for production system, and even dangerous and thus counter-productive: 1. If we can mount pstore, or read files, we can as well read /proc/kmsg. So, there's little point in duplicating the functionality and present the same information but via another userland ABI; 2. Expecting the kernel to behave sanely after oops/panic is naive. It might work, but you'd rather not try it. Screwed up kernel can do rather bad things, like recursive faults[1]; and pstore rather provoking bad things to happen. It uses: 1. Timers (assumes sane interrupts state); 2. Workqueues and mutexes (assumes scheduler in a sane state); 3. kzalloc (a working slab allocator); That's too much for a dead kernel, so the debugging facility itself might just make debugging harder, which is not what we want. Maybe for non-oops message types it would make sense to re-enable automatic updates, but so far I don't see any use case for this. Even for tracing, it has its own run-time/normal ABI, so we're only interested in pstore upon next boot, to retrieve what has gone wrong with HW or SW. So, let's disable the updates by default. [1] BUG: unable to handle kernel paging request at fffffffffffffff8 IP: [<ffffffff8104801b>] kthread_data+0xb/0x20 [...] Process kworker/0:1 (pid: 14, threadinfo ffff8800072c0000, task ffff88000725b100) [... Call Trace: [<ffffffff81043710>] wq_worker_sleeping+0x10/0xa0 [<ffffffff813687a8>] __schedule+0x568/0x7d0 [<ffffffff8106c24d>] ? trace_hardirqs_on+0xd/0x10 [<ffffffff81087e22>] ? call_rcu_sched+0x12/0x20 [<ffffffff8102b596>] ? release_task+0x156/0x2d0 [<ffffffff8102b45e>] ? release_task+0x1e/0x2d0 [<ffffffff8106c24d>] ? trace_hardirqs_on+0xd/0x10 [<ffffffff81368ac4>] schedule+0x24/0x70 [<ffffffff8102cba8>] do_exit+0x1f8/0x370 [<ffffffff810051e7>] oops_end+0x77/0xb0 [<ffffffff8135c301>] no_context+0x1a6/0x1b5 [<ffffffff8135c4de>] __bad_area_nosemaphore+0x1ce/0x1ed [<ffffffff81053156>] ? ttwu_queue+0xc6/0xe0 [<ffffffff8135c50b>] bad_area_nosemaphore+0xe/0x10 [<ffffffff8101fa47>] do_page_fault+0x2c7/0x450 [<ffffffff8106e34b>] ? __lock_release+0x6b/0xe0 [<ffffffff8106bf21>] ? mark_held_locks+0x61/0x140 [<ffffffff810502fe>] ? __wake_up+0x4e/0x70 [<ffffffff81185f7d>] ? trace_hardirqs_off_thunk+0x3a/0x3c [<ffffffff81158970>] ? pstore_register+0x120/0x120 [<ffffffff8136a37f>] page_fault+0x1f/0x30 [<ffffffff81158970>] ? pstore_register+0x120/0x120 [<ffffffff81185ab8>] ? memcpy+0x68/0x110 [<ffffffff8115875a>] ? pstore_get_records+0x3a/0x130 [<ffffffff811590f4>] ? persistent_ram_copy_old+0x64/0x90 [<ffffffff81158bf4>] ramoops_pstore_read+0x84/0x130 [<ffffffff81158799>] pstore_get_records+0x79/0x130 [<ffffffff81042536>] ? process_one_work+0x116/0x450 [<ffffffff81158970>] ? pstore_register+0x120/0x120 [<ffffffff8115897e>] pstore_dowork+0xe/0x10 [<ffffffff81042594>] process_one_work+0x174/0x450 [<ffffffff81042536>] ? process_one_work+0x116/0x450 [<ffffffff81042e13>] worker_thread+0x123/0x2d0 [<ffffffff81042cf0>] ? manage_workers.isra.28+0x120/0x120 [<ffffffff81047d8e>] kthread+0x8e/0xa0 [<ffffffff8136ba74>] kernel_thread_helper+0x4/0x10 [<ffffffff8136a199>] ? retint_restore_args+0xe/0xe [<ffffffff81047d00>] ? __init_kthread_worker+0x70/0x70 [<ffffffff8136ba70>] ? gs_change+0xb/0xb Code: be e2 00 00 00 48 c7 c7 d1 2a 4e 81 e8 bf fb fd ff 48 8b 5d f0 4c 8b 65 f8 c9 c3 0f 1f 44 00 00 48 8b 87 08 02 00 00 55 48 89 e5 <48> 8b 40 f8 5d c3 66 66 66 66 66 66 2e 0f 1f 84 00 00 00 00 00 RIP [<ffffffff8104801b>] kthread_data+0xb/0x20 RSP <ffff8800072c1888> CR2: fffffffffffffff8 ---[ end trace 996a332dc399111d ]--- Fixing recursive fault but reboot is needed! Signed-off-by: Anton Vorontsov <anton.vorontsov@linaro.org> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2012-05-26 21:20:29 +08:00
static int pstore_update_ms = -1;
module_param_named(update_ms, pstore_update_ms, int, 0600);
MODULE_PARM_DESC(update_ms, "milliseconds before pstore updates its content "
pstore/platform: Disable automatic updates by default Having automatic updates seems pointless for production system, and even dangerous and thus counter-productive: 1. If we can mount pstore, or read files, we can as well read /proc/kmsg. So, there's little point in duplicating the functionality and present the same information but via another userland ABI; 2. Expecting the kernel to behave sanely after oops/panic is naive. It might work, but you'd rather not try it. Screwed up kernel can do rather bad things, like recursive faults[1]; and pstore rather provoking bad things to happen. It uses: 1. Timers (assumes sane interrupts state); 2. Workqueues and mutexes (assumes scheduler in a sane state); 3. kzalloc (a working slab allocator); That's too much for a dead kernel, so the debugging facility itself might just make debugging harder, which is not what we want. Maybe for non-oops message types it would make sense to re-enable automatic updates, but so far I don't see any use case for this. Even for tracing, it has its own run-time/normal ABI, so we're only interested in pstore upon next boot, to retrieve what has gone wrong with HW or SW. So, let's disable the updates by default. [1] BUG: unable to handle kernel paging request at fffffffffffffff8 IP: [<ffffffff8104801b>] kthread_data+0xb/0x20 [...] Process kworker/0:1 (pid: 14, threadinfo ffff8800072c0000, task ffff88000725b100) [... Call Trace: [<ffffffff81043710>] wq_worker_sleeping+0x10/0xa0 [<ffffffff813687a8>] __schedule+0x568/0x7d0 [<ffffffff8106c24d>] ? trace_hardirqs_on+0xd/0x10 [<ffffffff81087e22>] ? call_rcu_sched+0x12/0x20 [<ffffffff8102b596>] ? release_task+0x156/0x2d0 [<ffffffff8102b45e>] ? release_task+0x1e/0x2d0 [<ffffffff8106c24d>] ? trace_hardirqs_on+0xd/0x10 [<ffffffff81368ac4>] schedule+0x24/0x70 [<ffffffff8102cba8>] do_exit+0x1f8/0x370 [<ffffffff810051e7>] oops_end+0x77/0xb0 [<ffffffff8135c301>] no_context+0x1a6/0x1b5 [<ffffffff8135c4de>] __bad_area_nosemaphore+0x1ce/0x1ed [<ffffffff81053156>] ? ttwu_queue+0xc6/0xe0 [<ffffffff8135c50b>] bad_area_nosemaphore+0xe/0x10 [<ffffffff8101fa47>] do_page_fault+0x2c7/0x450 [<ffffffff8106e34b>] ? __lock_release+0x6b/0xe0 [<ffffffff8106bf21>] ? mark_held_locks+0x61/0x140 [<ffffffff810502fe>] ? __wake_up+0x4e/0x70 [<ffffffff81185f7d>] ? trace_hardirqs_off_thunk+0x3a/0x3c [<ffffffff81158970>] ? pstore_register+0x120/0x120 [<ffffffff8136a37f>] page_fault+0x1f/0x30 [<ffffffff81158970>] ? pstore_register+0x120/0x120 [<ffffffff81185ab8>] ? memcpy+0x68/0x110 [<ffffffff8115875a>] ? pstore_get_records+0x3a/0x130 [<ffffffff811590f4>] ? persistent_ram_copy_old+0x64/0x90 [<ffffffff81158bf4>] ramoops_pstore_read+0x84/0x130 [<ffffffff81158799>] pstore_get_records+0x79/0x130 [<ffffffff81042536>] ? process_one_work+0x116/0x450 [<ffffffff81158970>] ? pstore_register+0x120/0x120 [<ffffffff8115897e>] pstore_dowork+0xe/0x10 [<ffffffff81042594>] process_one_work+0x174/0x450 [<ffffffff81042536>] ? process_one_work+0x116/0x450 [<ffffffff81042e13>] worker_thread+0x123/0x2d0 [<ffffffff81042cf0>] ? manage_workers.isra.28+0x120/0x120 [<ffffffff81047d8e>] kthread+0x8e/0xa0 [<ffffffff8136ba74>] kernel_thread_helper+0x4/0x10 [<ffffffff8136a199>] ? retint_restore_args+0xe/0xe [<ffffffff81047d00>] ? __init_kthread_worker+0x70/0x70 [<ffffffff8136ba70>] ? gs_change+0xb/0xb Code: be e2 00 00 00 48 c7 c7 d1 2a 4e 81 e8 bf fb fd ff 48 8b 5d f0 4c 8b 65 f8 c9 c3 0f 1f 44 00 00 48 8b 87 08 02 00 00 55 48 89 e5 <48> 8b 40 f8 5d c3 66 66 66 66 66 66 2e 0f 1f 84 00 00 00 00 00 RIP [<ffffffff8104801b>] kthread_data+0xb/0x20 RSP <ffff8800072c1888> CR2: fffffffffffffff8 ---[ end trace 996a332dc399111d ]--- Fixing recursive fault but reboot is needed! Signed-off-by: Anton Vorontsov <anton.vorontsov@linaro.org> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2012-05-26 21:20:29 +08:00
"(default is -1, which means runtime updates are disabled; "
"enabling this option is not safe, it may lead to further "
"corruption on Oopses)");
static int pstore_new_entry;
static void pstore_timefunc(unsigned long);
static DEFINE_TIMER(pstore_timer, pstore_timefunc, 0, 0);
static void pstore_dowork(struct work_struct *);
static DECLARE_WORK(pstore_work, pstore_dowork);
/*
* pstore_lock just protects "psinfo" during
* calls to pstore_register()
*/
static DEFINE_SPINLOCK(pstore_lock);
struct pstore_info *psinfo;
static char *backend;
/* How much of the console log to snapshot */
static unsigned long kmsg_bytes = 10240;
void pstore_set_kmsg_bytes(int bytes)
{
kmsg_bytes = bytes;
}
/* Tag each group of saved records with a sequence number */
static int oopscount;
static const char *get_reason_str(enum kmsg_dump_reason reason)
{
switch (reason) {
case KMSG_DUMP_PANIC:
return "Panic";
case KMSG_DUMP_OOPS:
return "Oops";
case KMSG_DUMP_EMERG:
return "Emergency";
case KMSG_DUMP_RESTART:
return "Restart";
case KMSG_DUMP_HALT:
return "Halt";
case KMSG_DUMP_POWEROFF:
return "Poweroff";
default:
return "Unknown";
}
}
/*
* callback from kmsg_dump. (s2,l2) has the most recently
* written bytes, older bytes are in (s1,l1). Save as much
* as we can from the end of the buffer.
*/
static void pstore_dump(struct kmsg_dumper *dumper,
enum kmsg_dump_reason reason)
{
unsigned long total = 0;
const char *why;
u64 id;
unsigned int part = 1;
unsigned long flags = 0;
int is_locked = 0;
int ret;
why = get_reason_str(reason);
if (in_nmi()) {
is_locked = spin_trylock(&psinfo->buf_lock);
if (!is_locked)
pr_err("pstore dump routine blocked in NMI, may corrupt error record\n");
} else
spin_lock_irqsave(&psinfo->buf_lock, flags);
oopscount++;
while (total < kmsg_bytes) {
char *dst;
unsigned long size;
int hsize;
size_t len;
dst = psinfo->buf;
hsize = sprintf(dst, "%s#%d Part%d\n", why, oopscount, part);
size = psinfo->bufsize - hsize;
dst += hsize;
if (!kmsg_dump_get_buffer(dumper, true, dst, size, &len))
break;
ret = psinfo->write(PSTORE_TYPE_DMESG, reason, &id, part,
efi_pstore: Add a sequence counter to a variable name [Issue] Currently, a variable name, which identifies each entry, consists of type, id and ctime. But if multiple events happens in a short time, a second/third event may fail to log because efi_pstore can't distinguish each event with current variable name. [Solution] A reasonable way to identify all events precisely is introducing a sequence counter to the variable name. The sequence counter has already supported in a pstore layer with "oopscount". So, this patch adds it to a variable name. Also, it is passed to read/erase callbacks of platform drivers in accordance with the modification of the variable name. <before applying this patch> a variable name of first event: dump-type0-1-12345678 a variable name of second event: dump-type0-1-12345678 type:0 id:1 ctime:12345678 If multiple events happen in a short time, efi_pstore can't distinguish them because variable names are same among them. <after applying this patch> it can be distinguishable by adding a sequence counter as follows. a variable name of first event: dump-type0-1-1-12345678 a variable name of Second event: dump-type0-1-2-12345678 type:0 id:1 sequence counter: 1(first event), 2(second event) ctime:12345678 In case of a write callback executed in pstore_console_write(), "0" is added to an argument of the write callback because it just logs all kernel messages and doesn't need to care about multiple events. Signed-off-by: Seiji Aguchi <seiji.aguchi@hds.com> Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> Acked-by: Mike Waychison <mikew@google.com> Signed-off-by: Tony Luck <tony.luck@intel.com>
2012-11-27 08:07:44 +08:00
oopscount, hsize + len, psinfo);
if (ret == 0 && reason == KMSG_DUMP_OOPS && pstore_is_mounted())
pstore_new_entry = 1;
total += hsize + len;
part++;
}
if (in_nmi()) {
if (is_locked)
spin_unlock(&psinfo->buf_lock);
} else
spin_unlock_irqrestore(&psinfo->buf_lock, flags);
}
static struct kmsg_dumper pstore_dumper = {
.dump = pstore_dump,
};
#ifdef CONFIG_PSTORE_CONSOLE
static void pstore_console_write(struct console *con, const char *s, unsigned c)
{
const char *e = s + c;
while (s < e) {
unsigned long flags;
u64 id;
if (c > psinfo->bufsize)
c = psinfo->bufsize;
if (oops_in_progress) {
if (!spin_trylock_irqsave(&psinfo->buf_lock, flags))
break;
} else {
spin_lock_irqsave(&psinfo->buf_lock, flags);
}
memcpy(psinfo->buf, s, c);
efi_pstore: Add a sequence counter to a variable name [Issue] Currently, a variable name, which identifies each entry, consists of type, id and ctime. But if multiple events happens in a short time, a second/third event may fail to log because efi_pstore can't distinguish each event with current variable name. [Solution] A reasonable way to identify all events precisely is introducing a sequence counter to the variable name. The sequence counter has already supported in a pstore layer with "oopscount". So, this patch adds it to a variable name. Also, it is passed to read/erase callbacks of platform drivers in accordance with the modification of the variable name. <before applying this patch> a variable name of first event: dump-type0-1-12345678 a variable name of second event: dump-type0-1-12345678 type:0 id:1 ctime:12345678 If multiple events happen in a short time, efi_pstore can't distinguish them because variable names are same among them. <after applying this patch> it can be distinguishable by adding a sequence counter as follows. a variable name of first event: dump-type0-1-1-12345678 a variable name of Second event: dump-type0-1-2-12345678 type:0 id:1 sequence counter: 1(first event), 2(second event) ctime:12345678 In case of a write callback executed in pstore_console_write(), "0" is added to an argument of the write callback because it just logs all kernel messages and doesn't need to care about multiple events. Signed-off-by: Seiji Aguchi <seiji.aguchi@hds.com> Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> Acked-by: Mike Waychison <mikew@google.com> Signed-off-by: Tony Luck <tony.luck@intel.com>
2012-11-27 08:07:44 +08:00
psinfo->write(PSTORE_TYPE_CONSOLE, 0, &id, 0, 0, c, psinfo);
spin_unlock_irqrestore(&psinfo->buf_lock, flags);
s += c;
c = e - s;
}
}
static struct console pstore_console = {
.name = "pstore",
.write = pstore_console_write,
.flags = CON_PRINTBUFFER | CON_ENABLED | CON_ANYTIME,
.index = -1,
};
static void pstore_register_console(void)
{
register_console(&pstore_console);
}
#else
static void pstore_register_console(void) {}
#endif
static int pstore_write_compat(enum pstore_type_id type,
enum kmsg_dump_reason reason,
efi_pstore: Add a sequence counter to a variable name [Issue] Currently, a variable name, which identifies each entry, consists of type, id and ctime. But if multiple events happens in a short time, a second/third event may fail to log because efi_pstore can't distinguish each event with current variable name. [Solution] A reasonable way to identify all events precisely is introducing a sequence counter to the variable name. The sequence counter has already supported in a pstore layer with "oopscount". So, this patch adds it to a variable name. Also, it is passed to read/erase callbacks of platform drivers in accordance with the modification of the variable name. <before applying this patch> a variable name of first event: dump-type0-1-12345678 a variable name of second event: dump-type0-1-12345678 type:0 id:1 ctime:12345678 If multiple events happen in a short time, efi_pstore can't distinguish them because variable names are same among them. <after applying this patch> it can be distinguishable by adding a sequence counter as follows. a variable name of first event: dump-type0-1-1-12345678 a variable name of Second event: dump-type0-1-2-12345678 type:0 id:1 sequence counter: 1(first event), 2(second event) ctime:12345678 In case of a write callback executed in pstore_console_write(), "0" is added to an argument of the write callback because it just logs all kernel messages and doesn't need to care about multiple events. Signed-off-by: Seiji Aguchi <seiji.aguchi@hds.com> Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> Acked-by: Mike Waychison <mikew@google.com> Signed-off-by: Tony Luck <tony.luck@intel.com>
2012-11-27 08:07:44 +08:00
u64 *id, unsigned int part, int count,
size_t size, struct pstore_info *psi)
{
return psi->write_buf(type, reason, id, part, psinfo->buf, size, psi);
}
/*
* platform specific persistent storage driver registers with
* us here. If pstore is already mounted, call the platform
* read function right away to populate the file system. If not
* then the pstore mount code will call us later to fill out
* the file system.
*
* Register with kmsg_dump to save last part of console log on panic.
*/
int pstore_register(struct pstore_info *psi)
{
struct module *owner = psi->owner;
spin_lock(&pstore_lock);
if (psinfo) {
spin_unlock(&pstore_lock);
return -EBUSY;
}
if (backend && strcmp(backend, psi->name)) {
spin_unlock(&pstore_lock);
return -EINVAL;
}
if (!psi->write)
psi->write = pstore_write_compat;
psinfo = psi;
pstore: pass allocated memory region back to caller The buf_lock cannot be held while populating the inodes, so make the backend pass forward an allocated and filled buffer instead. This solves the following backtrace. The effect is that "buf" is only ever used to notify the backends that something was written to it, and shouldn't be used in the read path. To replace the buf_lock during the read path, isolate the open/read/close loop with a separate mutex to maintain serialized access to the backend. Note that is is up to the pstore backend to cope if the (*write)() path is called in the middle of the read path. [ 59.691019] BUG: sleeping function called from invalid context at .../mm/slub.c:847 [ 59.691019] in_atomic(): 0, irqs_disabled(): 1, pid: 1819, name: mount [ 59.691019] Pid: 1819, comm: mount Not tainted 3.0.8 #1 [ 59.691019] Call Trace: [ 59.691019] [<810252d5>] __might_sleep+0xc3/0xca [ 59.691019] [<810a26e6>] kmem_cache_alloc+0x32/0xf3 [ 59.691019] [<810b53ac>] ? __d_lookup_rcu+0x6f/0xf4 [ 59.691019] [<810b68b1>] alloc_inode+0x2a/0x64 [ 59.691019] [<810b6903>] new_inode+0x18/0x43 [ 59.691019] [<81142447>] pstore_get_inode.isra.1+0x11/0x98 [ 59.691019] [<81142623>] pstore_mkfile+0xae/0x26f [ 59.691019] [<810a2a66>] ? kmem_cache_free+0x19/0xb1 [ 59.691019] [<8116c821>] ? ida_get_new_above+0x140/0x158 [ 59.691019] [<811708ea>] ? __init_rwsem+0x1e/0x2c [ 59.691019] [<810b67e8>] ? inode_init_always+0x111/0x1b0 [ 59.691019] [<8102127e>] ? should_resched+0xd/0x27 [ 59.691019] [<8137977f>] ? _cond_resched+0xd/0x21 [ 59.691019] [<81142abf>] pstore_get_records+0x52/0xa7 [ 59.691019] [<8114254b>] pstore_fill_super+0x7d/0x91 [ 59.691019] [<810a7ff5>] mount_single+0x46/0x82 [ 59.691019] [<8114231a>] pstore_mount+0x15/0x17 [ 59.691019] [<811424ce>] ? pstore_get_inode.isra.1+0x98/0x98 [ 59.691019] [<810a8199>] mount_fs+0x5a/0x12d [ 59.691019] [<810b9174>] ? alloc_vfsmnt+0xa4/0x14a [ 59.691019] [<810b9474>] vfs_kern_mount+0x4f/0x7d [ 59.691019] [<810b9d7e>] do_kern_mount+0x34/0xb2 [ 59.691019] [<810bb15f>] do_mount+0x5fc/0x64a [ 59.691019] [<810912fb>] ? strndup_user+0x2e/0x3f [ 59.691019] [<810bb3cb>] sys_mount+0x66/0x99 [ 59.691019] [<8137b537>] sysenter_do_call+0x12/0x26 Signed-off-by: Kees Cook <keescook@chromium.org> Signed-off-by: Tony Luck <tony.luck@intel.com>
2011-11-18 04:58:07 +08:00
mutex_init(&psinfo->read_mutex);
spin_unlock(&pstore_lock);
if (owner && !try_module_get(owner)) {
psinfo = NULL;
return -EINVAL;
}
if (pstore_is_mounted())
pstore_get_records(0);
kmsg_dump_register(&pstore_dumper);
pstore_register_console();
pstore_register_ftrace();
if (pstore_update_ms >= 0) {
pstore_timer.expires = jiffies +
msecs_to_jiffies(pstore_update_ms);
add_timer(&pstore_timer);
}
return 0;
}
EXPORT_SYMBOL_GPL(pstore_register);
/*
* Read all the records from the persistent store. Create
* files in our filesystem. Don't warn about -EEXIST errors
* when we are re-scanning the backing store looking to add new
* error records.
*/
void pstore_get_records(int quiet)
{
struct pstore_info *psi = psinfo;
pstore: pass allocated memory region back to caller The buf_lock cannot be held while populating the inodes, so make the backend pass forward an allocated and filled buffer instead. This solves the following backtrace. The effect is that "buf" is only ever used to notify the backends that something was written to it, and shouldn't be used in the read path. To replace the buf_lock during the read path, isolate the open/read/close loop with a separate mutex to maintain serialized access to the backend. Note that is is up to the pstore backend to cope if the (*write)() path is called in the middle of the read path. [ 59.691019] BUG: sleeping function called from invalid context at .../mm/slub.c:847 [ 59.691019] in_atomic(): 0, irqs_disabled(): 1, pid: 1819, name: mount [ 59.691019] Pid: 1819, comm: mount Not tainted 3.0.8 #1 [ 59.691019] Call Trace: [ 59.691019] [<810252d5>] __might_sleep+0xc3/0xca [ 59.691019] [<810a26e6>] kmem_cache_alloc+0x32/0xf3 [ 59.691019] [<810b53ac>] ? __d_lookup_rcu+0x6f/0xf4 [ 59.691019] [<810b68b1>] alloc_inode+0x2a/0x64 [ 59.691019] [<810b6903>] new_inode+0x18/0x43 [ 59.691019] [<81142447>] pstore_get_inode.isra.1+0x11/0x98 [ 59.691019] [<81142623>] pstore_mkfile+0xae/0x26f [ 59.691019] [<810a2a66>] ? kmem_cache_free+0x19/0xb1 [ 59.691019] [<8116c821>] ? ida_get_new_above+0x140/0x158 [ 59.691019] [<811708ea>] ? __init_rwsem+0x1e/0x2c [ 59.691019] [<810b67e8>] ? inode_init_always+0x111/0x1b0 [ 59.691019] [<8102127e>] ? should_resched+0xd/0x27 [ 59.691019] [<8137977f>] ? _cond_resched+0xd/0x21 [ 59.691019] [<81142abf>] pstore_get_records+0x52/0xa7 [ 59.691019] [<8114254b>] pstore_fill_super+0x7d/0x91 [ 59.691019] [<810a7ff5>] mount_single+0x46/0x82 [ 59.691019] [<8114231a>] pstore_mount+0x15/0x17 [ 59.691019] [<811424ce>] ? pstore_get_inode.isra.1+0x98/0x98 [ 59.691019] [<810a8199>] mount_fs+0x5a/0x12d [ 59.691019] [<810b9174>] ? alloc_vfsmnt+0xa4/0x14a [ 59.691019] [<810b9474>] vfs_kern_mount+0x4f/0x7d [ 59.691019] [<810b9d7e>] do_kern_mount+0x34/0xb2 [ 59.691019] [<810bb15f>] do_mount+0x5fc/0x64a [ 59.691019] [<810912fb>] ? strndup_user+0x2e/0x3f [ 59.691019] [<810bb3cb>] sys_mount+0x66/0x99 [ 59.691019] [<8137b537>] sysenter_do_call+0x12/0x26 Signed-off-by: Kees Cook <keescook@chromium.org> Signed-off-by: Tony Luck <tony.luck@intel.com>
2011-11-18 04:58:07 +08:00
char *buf = NULL;
ssize_t size;
u64 id;
efi_pstore: Add a sequence counter to a variable name [Issue] Currently, a variable name, which identifies each entry, consists of type, id and ctime. But if multiple events happens in a short time, a second/third event may fail to log because efi_pstore can't distinguish each event with current variable name. [Solution] A reasonable way to identify all events precisely is introducing a sequence counter to the variable name. The sequence counter has already supported in a pstore layer with "oopscount". So, this patch adds it to a variable name. Also, it is passed to read/erase callbacks of platform drivers in accordance with the modification of the variable name. <before applying this patch> a variable name of first event: dump-type0-1-12345678 a variable name of second event: dump-type0-1-12345678 type:0 id:1 ctime:12345678 If multiple events happen in a short time, efi_pstore can't distinguish them because variable names are same among them. <after applying this patch> it can be distinguishable by adding a sequence counter as follows. a variable name of first event: dump-type0-1-1-12345678 a variable name of Second event: dump-type0-1-2-12345678 type:0 id:1 sequence counter: 1(first event), 2(second event) ctime:12345678 In case of a write callback executed in pstore_console_write(), "0" is added to an argument of the write callback because it just logs all kernel messages and doesn't need to care about multiple events. Signed-off-by: Seiji Aguchi <seiji.aguchi@hds.com> Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> Acked-by: Mike Waychison <mikew@google.com> Signed-off-by: Tony Luck <tony.luck@intel.com>
2012-11-27 08:07:44 +08:00
int count;
enum pstore_type_id type;
struct timespec time;
int failed = 0, rc;
if (!psi)
return;
pstore: pass allocated memory region back to caller The buf_lock cannot be held while populating the inodes, so make the backend pass forward an allocated and filled buffer instead. This solves the following backtrace. The effect is that "buf" is only ever used to notify the backends that something was written to it, and shouldn't be used in the read path. To replace the buf_lock during the read path, isolate the open/read/close loop with a separate mutex to maintain serialized access to the backend. Note that is is up to the pstore backend to cope if the (*write)() path is called in the middle of the read path. [ 59.691019] BUG: sleeping function called from invalid context at .../mm/slub.c:847 [ 59.691019] in_atomic(): 0, irqs_disabled(): 1, pid: 1819, name: mount [ 59.691019] Pid: 1819, comm: mount Not tainted 3.0.8 #1 [ 59.691019] Call Trace: [ 59.691019] [<810252d5>] __might_sleep+0xc3/0xca [ 59.691019] [<810a26e6>] kmem_cache_alloc+0x32/0xf3 [ 59.691019] [<810b53ac>] ? __d_lookup_rcu+0x6f/0xf4 [ 59.691019] [<810b68b1>] alloc_inode+0x2a/0x64 [ 59.691019] [<810b6903>] new_inode+0x18/0x43 [ 59.691019] [<81142447>] pstore_get_inode.isra.1+0x11/0x98 [ 59.691019] [<81142623>] pstore_mkfile+0xae/0x26f [ 59.691019] [<810a2a66>] ? kmem_cache_free+0x19/0xb1 [ 59.691019] [<8116c821>] ? ida_get_new_above+0x140/0x158 [ 59.691019] [<811708ea>] ? __init_rwsem+0x1e/0x2c [ 59.691019] [<810b67e8>] ? inode_init_always+0x111/0x1b0 [ 59.691019] [<8102127e>] ? should_resched+0xd/0x27 [ 59.691019] [<8137977f>] ? _cond_resched+0xd/0x21 [ 59.691019] [<81142abf>] pstore_get_records+0x52/0xa7 [ 59.691019] [<8114254b>] pstore_fill_super+0x7d/0x91 [ 59.691019] [<810a7ff5>] mount_single+0x46/0x82 [ 59.691019] [<8114231a>] pstore_mount+0x15/0x17 [ 59.691019] [<811424ce>] ? pstore_get_inode.isra.1+0x98/0x98 [ 59.691019] [<810a8199>] mount_fs+0x5a/0x12d [ 59.691019] [<810b9174>] ? alloc_vfsmnt+0xa4/0x14a [ 59.691019] [<810b9474>] vfs_kern_mount+0x4f/0x7d [ 59.691019] [<810b9d7e>] do_kern_mount+0x34/0xb2 [ 59.691019] [<810bb15f>] do_mount+0x5fc/0x64a [ 59.691019] [<810912fb>] ? strndup_user+0x2e/0x3f [ 59.691019] [<810bb3cb>] sys_mount+0x66/0x99 [ 59.691019] [<8137b537>] sysenter_do_call+0x12/0x26 Signed-off-by: Kees Cook <keescook@chromium.org> Signed-off-by: Tony Luck <tony.luck@intel.com>
2011-11-18 04:58:07 +08:00
mutex_lock(&psi->read_mutex);
if (psi->open && psi->open(psi))
goto out;
efi_pstore: Add a sequence counter to a variable name [Issue] Currently, a variable name, which identifies each entry, consists of type, id and ctime. But if multiple events happens in a short time, a second/third event may fail to log because efi_pstore can't distinguish each event with current variable name. [Solution] A reasonable way to identify all events precisely is introducing a sequence counter to the variable name. The sequence counter has already supported in a pstore layer with "oopscount". So, this patch adds it to a variable name. Also, it is passed to read/erase callbacks of platform drivers in accordance with the modification of the variable name. <before applying this patch> a variable name of first event: dump-type0-1-12345678 a variable name of second event: dump-type0-1-12345678 type:0 id:1 ctime:12345678 If multiple events happen in a short time, efi_pstore can't distinguish them because variable names are same among them. <after applying this patch> it can be distinguishable by adding a sequence counter as follows. a variable name of first event: dump-type0-1-1-12345678 a variable name of Second event: dump-type0-1-2-12345678 type:0 id:1 sequence counter: 1(first event), 2(second event) ctime:12345678 In case of a write callback executed in pstore_console_write(), "0" is added to an argument of the write callback because it just logs all kernel messages and doesn't need to care about multiple events. Signed-off-by: Seiji Aguchi <seiji.aguchi@hds.com> Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> Acked-by: Mike Waychison <mikew@google.com> Signed-off-by: Tony Luck <tony.luck@intel.com>
2012-11-27 08:07:44 +08:00
while ((size = psi->read(&id, &type, &count, &time, &buf, psi)) > 0) {
rc = pstore_mkfile(type, psi->name, id, count, buf,
(size_t)size, time, psi);
pstore: pass allocated memory region back to caller The buf_lock cannot be held while populating the inodes, so make the backend pass forward an allocated and filled buffer instead. This solves the following backtrace. The effect is that "buf" is only ever used to notify the backends that something was written to it, and shouldn't be used in the read path. To replace the buf_lock during the read path, isolate the open/read/close loop with a separate mutex to maintain serialized access to the backend. Note that is is up to the pstore backend to cope if the (*write)() path is called in the middle of the read path. [ 59.691019] BUG: sleeping function called from invalid context at .../mm/slub.c:847 [ 59.691019] in_atomic(): 0, irqs_disabled(): 1, pid: 1819, name: mount [ 59.691019] Pid: 1819, comm: mount Not tainted 3.0.8 #1 [ 59.691019] Call Trace: [ 59.691019] [<810252d5>] __might_sleep+0xc3/0xca [ 59.691019] [<810a26e6>] kmem_cache_alloc+0x32/0xf3 [ 59.691019] [<810b53ac>] ? __d_lookup_rcu+0x6f/0xf4 [ 59.691019] [<810b68b1>] alloc_inode+0x2a/0x64 [ 59.691019] [<810b6903>] new_inode+0x18/0x43 [ 59.691019] [<81142447>] pstore_get_inode.isra.1+0x11/0x98 [ 59.691019] [<81142623>] pstore_mkfile+0xae/0x26f [ 59.691019] [<810a2a66>] ? kmem_cache_free+0x19/0xb1 [ 59.691019] [<8116c821>] ? ida_get_new_above+0x140/0x158 [ 59.691019] [<811708ea>] ? __init_rwsem+0x1e/0x2c [ 59.691019] [<810b67e8>] ? inode_init_always+0x111/0x1b0 [ 59.691019] [<8102127e>] ? should_resched+0xd/0x27 [ 59.691019] [<8137977f>] ? _cond_resched+0xd/0x21 [ 59.691019] [<81142abf>] pstore_get_records+0x52/0xa7 [ 59.691019] [<8114254b>] pstore_fill_super+0x7d/0x91 [ 59.691019] [<810a7ff5>] mount_single+0x46/0x82 [ 59.691019] [<8114231a>] pstore_mount+0x15/0x17 [ 59.691019] [<811424ce>] ? pstore_get_inode.isra.1+0x98/0x98 [ 59.691019] [<810a8199>] mount_fs+0x5a/0x12d [ 59.691019] [<810b9174>] ? alloc_vfsmnt+0xa4/0x14a [ 59.691019] [<810b9474>] vfs_kern_mount+0x4f/0x7d [ 59.691019] [<810b9d7e>] do_kern_mount+0x34/0xb2 [ 59.691019] [<810bb15f>] do_mount+0x5fc/0x64a [ 59.691019] [<810912fb>] ? strndup_user+0x2e/0x3f [ 59.691019] [<810bb3cb>] sys_mount+0x66/0x99 [ 59.691019] [<8137b537>] sysenter_do_call+0x12/0x26 Signed-off-by: Kees Cook <keescook@chromium.org> Signed-off-by: Tony Luck <tony.luck@intel.com>
2011-11-18 04:58:07 +08:00
kfree(buf);
buf = NULL;
if (rc && (rc != -EEXIST || !quiet))
failed++;
}
if (psi->close)
psi->close(psi);
out:
pstore: pass allocated memory region back to caller The buf_lock cannot be held while populating the inodes, so make the backend pass forward an allocated and filled buffer instead. This solves the following backtrace. The effect is that "buf" is only ever used to notify the backends that something was written to it, and shouldn't be used in the read path. To replace the buf_lock during the read path, isolate the open/read/close loop with a separate mutex to maintain serialized access to the backend. Note that is is up to the pstore backend to cope if the (*write)() path is called in the middle of the read path. [ 59.691019] BUG: sleeping function called from invalid context at .../mm/slub.c:847 [ 59.691019] in_atomic(): 0, irqs_disabled(): 1, pid: 1819, name: mount [ 59.691019] Pid: 1819, comm: mount Not tainted 3.0.8 #1 [ 59.691019] Call Trace: [ 59.691019] [<810252d5>] __might_sleep+0xc3/0xca [ 59.691019] [<810a26e6>] kmem_cache_alloc+0x32/0xf3 [ 59.691019] [<810b53ac>] ? __d_lookup_rcu+0x6f/0xf4 [ 59.691019] [<810b68b1>] alloc_inode+0x2a/0x64 [ 59.691019] [<810b6903>] new_inode+0x18/0x43 [ 59.691019] [<81142447>] pstore_get_inode.isra.1+0x11/0x98 [ 59.691019] [<81142623>] pstore_mkfile+0xae/0x26f [ 59.691019] [<810a2a66>] ? kmem_cache_free+0x19/0xb1 [ 59.691019] [<8116c821>] ? ida_get_new_above+0x140/0x158 [ 59.691019] [<811708ea>] ? __init_rwsem+0x1e/0x2c [ 59.691019] [<810b67e8>] ? inode_init_always+0x111/0x1b0 [ 59.691019] [<8102127e>] ? should_resched+0xd/0x27 [ 59.691019] [<8137977f>] ? _cond_resched+0xd/0x21 [ 59.691019] [<81142abf>] pstore_get_records+0x52/0xa7 [ 59.691019] [<8114254b>] pstore_fill_super+0x7d/0x91 [ 59.691019] [<810a7ff5>] mount_single+0x46/0x82 [ 59.691019] [<8114231a>] pstore_mount+0x15/0x17 [ 59.691019] [<811424ce>] ? pstore_get_inode.isra.1+0x98/0x98 [ 59.691019] [<810a8199>] mount_fs+0x5a/0x12d [ 59.691019] [<810b9174>] ? alloc_vfsmnt+0xa4/0x14a [ 59.691019] [<810b9474>] vfs_kern_mount+0x4f/0x7d [ 59.691019] [<810b9d7e>] do_kern_mount+0x34/0xb2 [ 59.691019] [<810bb15f>] do_mount+0x5fc/0x64a [ 59.691019] [<810912fb>] ? strndup_user+0x2e/0x3f [ 59.691019] [<810bb3cb>] sys_mount+0x66/0x99 [ 59.691019] [<8137b537>] sysenter_do_call+0x12/0x26 Signed-off-by: Kees Cook <keescook@chromium.org> Signed-off-by: Tony Luck <tony.luck@intel.com>
2011-11-18 04:58:07 +08:00
mutex_unlock(&psi->read_mutex);
if (failed)
printk(KERN_WARNING "pstore: failed to load %d record(s) from '%s'\n",
failed, psi->name);
}
static void pstore_dowork(struct work_struct *work)
{
pstore_get_records(1);
}
static void pstore_timefunc(unsigned long dummy)
{
if (pstore_new_entry) {
pstore_new_entry = 0;
schedule_work(&pstore_work);
}
mod_timer(&pstore_timer, jiffies + msecs_to_jiffies(pstore_update_ms));
}
module_param(backend, charp, 0444);
MODULE_PARM_DESC(backend, "Pstore backend to use");