OpenCloudOS-Kernel/drivers/acpi/apei/erst.c

1216 lines
28 KiB
C
Raw Normal View History

/*
* APEI Error Record Serialization Table support
*
* ERST is a way provided by APEI to save and retrieve hardware error
* information to and from a persistent store.
*
* For more information about ERST, please refer to ACPI Specification
* version 4.0, section 17.4.
*
* Copyright 2010 Intel Corp.
* Author: Huang Ying <ying.huang@intel.com>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License version
* 2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*/
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/init.h>
#include <linux/delay.h>
#include <linux/io.h>
#include <linux/acpi.h>
#include <linux/uaccess.h>
#include <linux/cper.h>
#include <linux/nmi.h>
#include <linux/hardirq.h>
#include <linux/pstore.h>
x86/mm: Decouple <linux/vmalloc.h> from <asm/io.h> Nothing in <asm/io.h> uses anything from <linux/vmalloc.h>, so remove it from there and fix up the resulting build problems triggered on x86 {64|32}-bit {def|allmod|allno}configs. The breakages were triggering in places where x86 builds relied on vmalloc() facilities but did not include <linux/vmalloc.h> explicitly and relied on the implicit inclusion via <asm/io.h>. Also add: - <linux/init.h> to <linux/io.h> - <asm/pgtable_types> to <asm/io.h> ... which were two other implicit header file dependencies. Suggested-by: David Miller <davem@davemloft.net> Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au> [ Tidied up the changelog. ] Acked-by: David Miller <davem@davemloft.net> Acked-by: Takashi Iwai <tiwai@suse.de> Acked-by: Viresh Kumar <viresh.kumar@linaro.org> Acked-by: Vinod Koul <vinod.koul@intel.com> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: Anton Vorontsov <anton@enomsg.org> Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com> Cc: Colin Cross <ccross@android.com> Cc: David Vrabel <david.vrabel@citrix.com> Cc: H. Peter Anvin <hpa@zytor.com> Cc: Haiyang Zhang <haiyangz@microsoft.com> Cc: James E.J. Bottomley <JBottomley@odin.com> Cc: Jaroslav Kysela <perex@perex.cz> Cc: K. Y. Srinivasan <kys@microsoft.com> Cc: Kees Cook <keescook@chromium.org> Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> Cc: Kristen Carlson Accardi <kristen@linux.intel.com> Cc: Len Brown <lenb@kernel.org> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Rafael J. Wysocki <rjw@rjwysocki.net> Cc: Suma Ramars <sramars@cisco.com> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Tony Luck <tony.luck@intel.com> Signed-off-by: Ingo Molnar <mingo@kernel.org>
2015-06-02 17:01:38 +08:00
#include <linux/vmalloc.h>
#include <linux/mm.h> /* kvfree() */
#include <acpi/apei.h>
#include "apei-internal.h"
#undef pr_fmt
#define pr_fmt(fmt) "ERST: " fmt
/* ERST command status */
#define ERST_STATUS_SUCCESS 0x0
#define ERST_STATUS_NOT_ENOUGH_SPACE 0x1
#define ERST_STATUS_HARDWARE_NOT_AVAILABLE 0x2
#define ERST_STATUS_FAILED 0x3
#define ERST_STATUS_RECORD_STORE_EMPTY 0x4
#define ERST_STATUS_RECORD_NOT_FOUND 0x5
#define ERST_TAB_ENTRY(tab) \
((struct acpi_whea_header *)((char *)(tab) + \
sizeof(struct acpi_table_erst)))
#define SPIN_UNIT 100 /* 100ns */
/* Firmware should respond within 1 milliseconds */
#define FIRMWARE_TIMEOUT (1 * NSEC_PER_MSEC)
#define FIRMWARE_MAX_STALL 50 /* 50us */
int erst_disable;
EXPORT_SYMBOL_GPL(erst_disable);
static struct acpi_table_erst *erst_tab;
/* ERST Error Log Address Range atrributes */
#define ERST_RANGE_RESERVED 0x0001
#define ERST_RANGE_NVRAM 0x0002
#define ERST_RANGE_SLOW 0x0004
/*
* ERST Error Log Address Range, used as buffer for reading/writing
* error records.
*/
static struct erst_erange {
u64 base;
u64 size;
void __iomem *vaddr;
u32 attr;
} erst_erange;
/*
* Prevent ERST interpreter to run simultaneously, because the
* corresponding firmware implementation may not work properly when
* invoked simultaneously.
*
* It is used to provide exclusive accessing for ERST Error Log
* Address Range too.
*/
static DEFINE_RAW_SPINLOCK(erst_lock);
static inline int erst_errno(int command_status)
{
switch (command_status) {
case ERST_STATUS_SUCCESS:
return 0;
case ERST_STATUS_HARDWARE_NOT_AVAILABLE:
return -ENODEV;
case ERST_STATUS_NOT_ENOUGH_SPACE:
return -ENOSPC;
case ERST_STATUS_RECORD_STORE_EMPTY:
case ERST_STATUS_RECORD_NOT_FOUND:
return -ENOENT;
default:
return -EINVAL;
}
}
static int erst_timedout(u64 *t, u64 spin_unit)
{
if ((s64)*t < spin_unit) {
pr_warn(FW_WARN "Firmware does not respond in time.\n");
return 1;
}
*t -= spin_unit;
ndelay(spin_unit);
touch_nmi_watchdog();
return 0;
}
static int erst_exec_load_var1(struct apei_exec_context *ctx,
struct acpi_whea_header *entry)
{
return __apei_exec_read_register(entry, &ctx->var1);
}
static int erst_exec_load_var2(struct apei_exec_context *ctx,
struct acpi_whea_header *entry)
{
return __apei_exec_read_register(entry, &ctx->var2);
}
static int erst_exec_store_var1(struct apei_exec_context *ctx,
struct acpi_whea_header *entry)
{
return __apei_exec_write_register(entry, ctx->var1);
}
static int erst_exec_add(struct apei_exec_context *ctx,
struct acpi_whea_header *entry)
{
ctx->var1 += ctx->var2;
return 0;
}
static int erst_exec_subtract(struct apei_exec_context *ctx,
struct acpi_whea_header *entry)
{
ctx->var1 -= ctx->var2;
return 0;
}
static int erst_exec_add_value(struct apei_exec_context *ctx,
struct acpi_whea_header *entry)
{
int rc;
u64 val;
rc = __apei_exec_read_register(entry, &val);
if (rc)
return rc;
val += ctx->value;
rc = __apei_exec_write_register(entry, val);
return rc;
}
static int erst_exec_subtract_value(struct apei_exec_context *ctx,
struct acpi_whea_header *entry)
{
int rc;
u64 val;
rc = __apei_exec_read_register(entry, &val);
if (rc)
return rc;
val -= ctx->value;
rc = __apei_exec_write_register(entry, val);
return rc;
}
static int erst_exec_stall(struct apei_exec_context *ctx,
struct acpi_whea_header *entry)
{
u64 stall_time;
if (ctx->value > FIRMWARE_MAX_STALL) {
if (!in_nmi())
pr_warn(FW_WARN
"Too long stall time for stall instruction: 0x%llx.\n",
ctx->value);
stall_time = FIRMWARE_MAX_STALL;
} else
stall_time = ctx->value;
udelay(stall_time);
return 0;
}
static int erst_exec_stall_while_true(struct apei_exec_context *ctx,
struct acpi_whea_header *entry)
{
int rc;
u64 val;
u64 timeout = FIRMWARE_TIMEOUT;
u64 stall_time;
if (ctx->var1 > FIRMWARE_MAX_STALL) {
if (!in_nmi())
pr_warn(FW_WARN
"Too long stall time for stall while true instruction: 0x%llx.\n",
ctx->var1);
stall_time = FIRMWARE_MAX_STALL;
} else
stall_time = ctx->var1;
for (;;) {
rc = __apei_exec_read_register(entry, &val);
if (rc)
return rc;
if (val != ctx->value)
break;
if (erst_timedout(&timeout, stall_time * NSEC_PER_USEC))
return -EIO;
}
return 0;
}
static int erst_exec_skip_next_instruction_if_true(
struct apei_exec_context *ctx,
struct acpi_whea_header *entry)
{
int rc;
u64 val;
rc = __apei_exec_read_register(entry, &val);
if (rc)
return rc;
if (val == ctx->value) {
ctx->ip += 2;
return APEI_EXEC_SET_IP;
}
return 0;
}
static int erst_exec_goto(struct apei_exec_context *ctx,
struct acpi_whea_header *entry)
{
ctx->ip = ctx->value;
return APEI_EXEC_SET_IP;
}
static int erst_exec_set_src_address_base(struct apei_exec_context *ctx,
struct acpi_whea_header *entry)
{
return __apei_exec_read_register(entry, &ctx->src_base);
}
static int erst_exec_set_dst_address_base(struct apei_exec_context *ctx,
struct acpi_whea_header *entry)
{
return __apei_exec_read_register(entry, &ctx->dst_base);
}
static int erst_exec_move_data(struct apei_exec_context *ctx,
struct acpi_whea_header *entry)
{
int rc;
u64 offset;
void *src, *dst;
/* ioremap does not work in interrupt context */
if (in_interrupt()) {
pr_warn("MOVE_DATA can not be used in interrupt context.\n");
return -EBUSY;
}
rc = __apei_exec_read_register(entry, &offset);
if (rc)
return rc;
src = ioremap(ctx->src_base + offset, ctx->var2);
if (!src)
return -ENOMEM;
dst = ioremap(ctx->dst_base + offset, ctx->var2);
if (!dst) {
iounmap(src);
return -ENOMEM;
}
memmove(dst, src, ctx->var2);
iounmap(src);
iounmap(dst);
return 0;
}
static struct apei_exec_ins_type erst_ins_type[] = {
[ACPI_ERST_READ_REGISTER] = {
.flags = APEI_EXEC_INS_ACCESS_REGISTER,
.run = apei_exec_read_register,
},
[ACPI_ERST_READ_REGISTER_VALUE] = {
.flags = APEI_EXEC_INS_ACCESS_REGISTER,
.run = apei_exec_read_register_value,
},
[ACPI_ERST_WRITE_REGISTER] = {
.flags = APEI_EXEC_INS_ACCESS_REGISTER,
.run = apei_exec_write_register,
},
[ACPI_ERST_WRITE_REGISTER_VALUE] = {
.flags = APEI_EXEC_INS_ACCESS_REGISTER,
.run = apei_exec_write_register_value,
},
[ACPI_ERST_NOOP] = {
.flags = 0,
.run = apei_exec_noop,
},
[ACPI_ERST_LOAD_VAR1] = {
.flags = APEI_EXEC_INS_ACCESS_REGISTER,
.run = erst_exec_load_var1,
},
[ACPI_ERST_LOAD_VAR2] = {
.flags = APEI_EXEC_INS_ACCESS_REGISTER,
.run = erst_exec_load_var2,
},
[ACPI_ERST_STORE_VAR1] = {
.flags = APEI_EXEC_INS_ACCESS_REGISTER,
.run = erst_exec_store_var1,
},
[ACPI_ERST_ADD] = {
.flags = 0,
.run = erst_exec_add,
},
[ACPI_ERST_SUBTRACT] = {
.flags = 0,
.run = erst_exec_subtract,
},
[ACPI_ERST_ADD_VALUE] = {
.flags = APEI_EXEC_INS_ACCESS_REGISTER,
.run = erst_exec_add_value,
},
[ACPI_ERST_SUBTRACT_VALUE] = {
.flags = APEI_EXEC_INS_ACCESS_REGISTER,
.run = erst_exec_subtract_value,
},
[ACPI_ERST_STALL] = {
.flags = 0,
.run = erst_exec_stall,
},
[ACPI_ERST_STALL_WHILE_TRUE] = {
.flags = APEI_EXEC_INS_ACCESS_REGISTER,
.run = erst_exec_stall_while_true,
},
[ACPI_ERST_SKIP_NEXT_IF_TRUE] = {
.flags = APEI_EXEC_INS_ACCESS_REGISTER,
.run = erst_exec_skip_next_instruction_if_true,
},
[ACPI_ERST_GOTO] = {
.flags = 0,
.run = erst_exec_goto,
},
[ACPI_ERST_SET_SRC_ADDRESS_BASE] = {
.flags = APEI_EXEC_INS_ACCESS_REGISTER,
.run = erst_exec_set_src_address_base,
},
[ACPI_ERST_SET_DST_ADDRESS_BASE] = {
.flags = APEI_EXEC_INS_ACCESS_REGISTER,
.run = erst_exec_set_dst_address_base,
},
[ACPI_ERST_MOVE_DATA] = {
.flags = APEI_EXEC_INS_ACCESS_REGISTER,
.run = erst_exec_move_data,
},
};
static inline void erst_exec_ctx_init(struct apei_exec_context *ctx)
{
apei_exec_ctx_init(ctx, erst_ins_type, ARRAY_SIZE(erst_ins_type),
ERST_TAB_ENTRY(erst_tab), erst_tab->entries);
}
static int erst_get_erange(struct erst_erange *range)
{
struct apei_exec_context ctx;
int rc;
erst_exec_ctx_init(&ctx);
rc = apei_exec_run(&ctx, ACPI_ERST_GET_ERROR_RANGE);
if (rc)
return rc;
range->base = apei_exec_ctx_get_output(&ctx);
rc = apei_exec_run(&ctx, ACPI_ERST_GET_ERROR_LENGTH);
if (rc)
return rc;
range->size = apei_exec_ctx_get_output(&ctx);
rc = apei_exec_run(&ctx, ACPI_ERST_GET_ERROR_ATTRIBUTES);
if (rc)
return rc;
range->attr = apei_exec_ctx_get_output(&ctx);
return 0;
}
static ssize_t __erst_get_record_count(void)
{
struct apei_exec_context ctx;
int rc;
erst_exec_ctx_init(&ctx);
rc = apei_exec_run(&ctx, ACPI_ERST_GET_RECORD_COUNT);
if (rc)
return rc;
return apei_exec_ctx_get_output(&ctx);
}
ssize_t erst_get_record_count(void)
{
ssize_t count;
unsigned long flags;
if (erst_disable)
return -ENODEV;
raw_spin_lock_irqsave(&erst_lock, flags);
count = __erst_get_record_count();
raw_spin_unlock_irqrestore(&erst_lock, flags);
return count;
}
EXPORT_SYMBOL_GPL(erst_get_record_count);
#define ERST_RECORD_ID_CACHE_SIZE_MIN 16
#define ERST_RECORD_ID_CACHE_SIZE_MAX 1024
struct erst_record_id_cache {
struct mutex lock;
u64 *entries;
int len;
int size;
int refcount;
};
static struct erst_record_id_cache erst_record_id_cache = {
.lock = __MUTEX_INITIALIZER(erst_record_id_cache.lock),
.refcount = 0,
};
static int __erst_get_next_record_id(u64 *record_id)
{
struct apei_exec_context ctx;
int rc;
erst_exec_ctx_init(&ctx);
rc = apei_exec_run(&ctx, ACPI_ERST_GET_RECORD_ID);
if (rc)
return rc;
*record_id = apei_exec_ctx_get_output(&ctx);
return 0;
}
int erst_get_record_id_begin(int *pos)
{
int rc;
if (erst_disable)
return -ENODEV;
rc = mutex_lock_interruptible(&erst_record_id_cache.lock);
if (rc)
return rc;
erst_record_id_cache.refcount++;
mutex_unlock(&erst_record_id_cache.lock);
*pos = 0;
return 0;
}
EXPORT_SYMBOL_GPL(erst_get_record_id_begin);
/* erst_record_id_cache.lock must be held by caller */
static int __erst_record_id_cache_add_one(void)
{
u64 id, prev_id, first_id;
int i, rc;
u64 *entries;
unsigned long flags;
id = prev_id = first_id = APEI_ERST_INVALID_RECORD_ID;
retry:
raw_spin_lock_irqsave(&erst_lock, flags);
rc = __erst_get_next_record_id(&id);
raw_spin_unlock_irqrestore(&erst_lock, flags);
if (rc == -ENOENT)
return 0;
if (rc)
return rc;
if (id == APEI_ERST_INVALID_RECORD_ID)
return 0;
/* can not skip current ID, or loop back to first ID */
if (id == prev_id || id == first_id)
return 0;
if (first_id == APEI_ERST_INVALID_RECORD_ID)
first_id = id;
prev_id = id;
entries = erst_record_id_cache.entries;
for (i = 0; i < erst_record_id_cache.len; i++) {
if (entries[i] == id)
break;
}
/* record id already in cache, try next */
if (i < erst_record_id_cache.len)
goto retry;
if (erst_record_id_cache.len >= erst_record_id_cache.size) {
treewide: use kv[mz]alloc* rather than opencoded variants There are many code paths opencoding kvmalloc. Let's use the helper instead. The main difference to kvmalloc is that those users are usually not considering all the aspects of the memory allocator. E.g. allocation requests <= 32kB (with 4kB pages) are basically never failing and invoke OOM killer to satisfy the allocation. This sounds too disruptive for something that has a reasonable fallback - the vmalloc. On the other hand those requests might fallback to vmalloc even when the memory allocator would succeed after several more reclaim/compaction attempts previously. There is no guarantee something like that happens though. This patch converts many of those places to kv[mz]alloc* helpers because they are more conservative. Link: http://lkml.kernel.org/r/20170306103327.2766-2-mhocko@kernel.org Signed-off-by: Michal Hocko <mhocko@suse.com> Reviewed-by: Boris Ostrovsky <boris.ostrovsky@oracle.com> # Xen bits Acked-by: Kees Cook <keescook@chromium.org> Acked-by: Vlastimil Babka <vbabka@suse.cz> Acked-by: Andreas Dilger <andreas.dilger@intel.com> # Lustre Acked-by: Christian Borntraeger <borntraeger@de.ibm.com> # KVM/s390 Acked-by: Dan Williams <dan.j.williams@intel.com> # nvdim Acked-by: David Sterba <dsterba@suse.com> # btrfs Acked-by: Ilya Dryomov <idryomov@gmail.com> # Ceph Acked-by: Tariq Toukan <tariqt@mellanox.com> # mlx4 Acked-by: Leon Romanovsky <leonro@mellanox.com> # mlx5 Cc: Martin Schwidefsky <schwidefsky@de.ibm.com> Cc: Heiko Carstens <heiko.carstens@de.ibm.com> Cc: Herbert Xu <herbert@gondor.apana.org.au> Cc: Anton Vorontsov <anton@enomsg.org> Cc: Colin Cross <ccross@android.com> Cc: Tony Luck <tony.luck@intel.com> Cc: "Rafael J. Wysocki" <rjw@rjwysocki.net> Cc: Ben Skeggs <bskeggs@redhat.com> Cc: Kent Overstreet <kent.overstreet@gmail.com> Cc: Santosh Raspatur <santosh@chelsio.com> Cc: Hariprasad S <hariprasad@chelsio.com> Cc: Yishai Hadas <yishaih@mellanox.com> Cc: Oleg Drokin <oleg.drokin@intel.com> Cc: "Yan, Zheng" <zyan@redhat.com> Cc: Alexander Viro <viro@zeniv.linux.org.uk> Cc: Alexei Starovoitov <ast@kernel.org> Cc: Eric Dumazet <eric.dumazet@gmail.com> Cc: David Miller <davem@davemloft.net> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2017-05-09 06:57:27 +08:00
int new_size;
u64 *new_entries;
new_size = erst_record_id_cache.size * 2;
new_size = clamp_val(new_size, ERST_RECORD_ID_CACHE_SIZE_MIN,
ERST_RECORD_ID_CACHE_SIZE_MAX);
if (new_size <= erst_record_id_cache.size) {
if (printk_ratelimit())
pr_warn(FW_WARN "too many record IDs!\n");
return 0;
}
treewide: use kv[mz]alloc* rather than opencoded variants There are many code paths opencoding kvmalloc. Let's use the helper instead. The main difference to kvmalloc is that those users are usually not considering all the aspects of the memory allocator. E.g. allocation requests <= 32kB (with 4kB pages) are basically never failing and invoke OOM killer to satisfy the allocation. This sounds too disruptive for something that has a reasonable fallback - the vmalloc. On the other hand those requests might fallback to vmalloc even when the memory allocator would succeed after several more reclaim/compaction attempts previously. There is no guarantee something like that happens though. This patch converts many of those places to kv[mz]alloc* helpers because they are more conservative. Link: http://lkml.kernel.org/r/20170306103327.2766-2-mhocko@kernel.org Signed-off-by: Michal Hocko <mhocko@suse.com> Reviewed-by: Boris Ostrovsky <boris.ostrovsky@oracle.com> # Xen bits Acked-by: Kees Cook <keescook@chromium.org> Acked-by: Vlastimil Babka <vbabka@suse.cz> Acked-by: Andreas Dilger <andreas.dilger@intel.com> # Lustre Acked-by: Christian Borntraeger <borntraeger@de.ibm.com> # KVM/s390 Acked-by: Dan Williams <dan.j.williams@intel.com> # nvdim Acked-by: David Sterba <dsterba@suse.com> # btrfs Acked-by: Ilya Dryomov <idryomov@gmail.com> # Ceph Acked-by: Tariq Toukan <tariqt@mellanox.com> # mlx4 Acked-by: Leon Romanovsky <leonro@mellanox.com> # mlx5 Cc: Martin Schwidefsky <schwidefsky@de.ibm.com> Cc: Heiko Carstens <heiko.carstens@de.ibm.com> Cc: Herbert Xu <herbert@gondor.apana.org.au> Cc: Anton Vorontsov <anton@enomsg.org> Cc: Colin Cross <ccross@android.com> Cc: Tony Luck <tony.luck@intel.com> Cc: "Rafael J. Wysocki" <rjw@rjwysocki.net> Cc: Ben Skeggs <bskeggs@redhat.com> Cc: Kent Overstreet <kent.overstreet@gmail.com> Cc: Santosh Raspatur <santosh@chelsio.com> Cc: Hariprasad S <hariprasad@chelsio.com> Cc: Yishai Hadas <yishaih@mellanox.com> Cc: Oleg Drokin <oleg.drokin@intel.com> Cc: "Yan, Zheng" <zyan@redhat.com> Cc: Alexander Viro <viro@zeniv.linux.org.uk> Cc: Alexei Starovoitov <ast@kernel.org> Cc: Eric Dumazet <eric.dumazet@gmail.com> Cc: David Miller <davem@davemloft.net> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2017-05-09 06:57:27 +08:00
new_entries = kvmalloc(new_size * sizeof(entries[0]), GFP_KERNEL);
if (!new_entries)
return -ENOMEM;
memcpy(new_entries, entries,
erst_record_id_cache.len * sizeof(entries[0]));
kvfree(entries);
erst_record_id_cache.entries = entries = new_entries;
erst_record_id_cache.size = new_size;
}
entries[i] = id;
erst_record_id_cache.len++;
return 1;
}
/*
* Get the record ID of an existing error record on the persistent
* storage. If there is no error record on the persistent storage, the
* returned record_id is APEI_ERST_INVALID_RECORD_ID.
*/
int erst_get_record_id_next(int *pos, u64 *record_id)
{
int rc = 0;
u64 *entries;
if (erst_disable)
return -ENODEV;
/* must be enclosed by erst_get_record_id_begin/end */
BUG_ON(!erst_record_id_cache.refcount);
BUG_ON(*pos < 0 || *pos > erst_record_id_cache.len);
mutex_lock(&erst_record_id_cache.lock);
entries = erst_record_id_cache.entries;
for (; *pos < erst_record_id_cache.len; (*pos)++)
if (entries[*pos] != APEI_ERST_INVALID_RECORD_ID)
break;
/* found next record id in cache */
if (*pos < erst_record_id_cache.len) {
*record_id = entries[*pos];
(*pos)++;
goto out_unlock;
}
/* Try to add one more record ID to cache */
rc = __erst_record_id_cache_add_one();
if (rc < 0)
goto out_unlock;
/* successfully add one new ID */
if (rc == 1) {
*record_id = erst_record_id_cache.entries[*pos];
(*pos)++;
rc = 0;
} else {
*pos = -1;
*record_id = APEI_ERST_INVALID_RECORD_ID;
}
out_unlock:
mutex_unlock(&erst_record_id_cache.lock);
return rc;
}
EXPORT_SYMBOL_GPL(erst_get_record_id_next);
/* erst_record_id_cache.lock must be held by caller */
static void __erst_record_id_cache_compact(void)
{
int i, wpos = 0;
u64 *entries;
if (erst_record_id_cache.refcount)
return;
entries = erst_record_id_cache.entries;
for (i = 0; i < erst_record_id_cache.len; i++) {
if (entries[i] == APEI_ERST_INVALID_RECORD_ID)
continue;
if (wpos != i)
entries[wpos] = entries[i];
wpos++;
}
erst_record_id_cache.len = wpos;
}
void erst_get_record_id_end(void)
{
/*
* erst_disable != 0 should be detected by invoker via the
* return value of erst_get_record_id_begin/next, so this
* function should not be called for erst_disable != 0.
*/
BUG_ON(erst_disable);
mutex_lock(&erst_record_id_cache.lock);
erst_record_id_cache.refcount--;
BUG_ON(erst_record_id_cache.refcount < 0);
__erst_record_id_cache_compact();
mutex_unlock(&erst_record_id_cache.lock);
}
EXPORT_SYMBOL_GPL(erst_get_record_id_end);
static int __erst_write_to_storage(u64 offset)
{
struct apei_exec_context ctx;
u64 timeout = FIRMWARE_TIMEOUT;
u64 val;
int rc;
erst_exec_ctx_init(&ctx);
rc = apei_exec_run_optional(&ctx, ACPI_ERST_BEGIN_WRITE);
if (rc)
return rc;
apei_exec_ctx_set_input(&ctx, offset);
rc = apei_exec_run(&ctx, ACPI_ERST_SET_RECORD_OFFSET);
if (rc)
return rc;
rc = apei_exec_run(&ctx, ACPI_ERST_EXECUTE_OPERATION);
if (rc)
return rc;
for (;;) {
rc = apei_exec_run(&ctx, ACPI_ERST_CHECK_BUSY_STATUS);
if (rc)
return rc;
val = apei_exec_ctx_get_output(&ctx);
if (!val)
break;
if (erst_timedout(&timeout, SPIN_UNIT))
return -EIO;
}
rc = apei_exec_run(&ctx, ACPI_ERST_GET_COMMAND_STATUS);
if (rc)
return rc;
val = apei_exec_ctx_get_output(&ctx);
rc = apei_exec_run_optional(&ctx, ACPI_ERST_END);
if (rc)
return rc;
return erst_errno(val);
}
static int __erst_read_from_storage(u64 record_id, u64 offset)
{
struct apei_exec_context ctx;
u64 timeout = FIRMWARE_TIMEOUT;
u64 val;
int rc;
erst_exec_ctx_init(&ctx);
rc = apei_exec_run_optional(&ctx, ACPI_ERST_BEGIN_READ);
if (rc)
return rc;
apei_exec_ctx_set_input(&ctx, offset);
rc = apei_exec_run(&ctx, ACPI_ERST_SET_RECORD_OFFSET);
if (rc)
return rc;
apei_exec_ctx_set_input(&ctx, record_id);
rc = apei_exec_run(&ctx, ACPI_ERST_SET_RECORD_ID);
if (rc)
return rc;
rc = apei_exec_run(&ctx, ACPI_ERST_EXECUTE_OPERATION);
if (rc)
return rc;
for (;;) {
rc = apei_exec_run(&ctx, ACPI_ERST_CHECK_BUSY_STATUS);
if (rc)
return rc;
val = apei_exec_ctx_get_output(&ctx);
if (!val)
break;
if (erst_timedout(&timeout, SPIN_UNIT))
return -EIO;
};
rc = apei_exec_run(&ctx, ACPI_ERST_GET_COMMAND_STATUS);
if (rc)
return rc;
val = apei_exec_ctx_get_output(&ctx);
rc = apei_exec_run_optional(&ctx, ACPI_ERST_END);
if (rc)
return rc;
return erst_errno(val);
}
static int __erst_clear_from_storage(u64 record_id)
{
struct apei_exec_context ctx;
u64 timeout = FIRMWARE_TIMEOUT;
u64 val;
int rc;
erst_exec_ctx_init(&ctx);
rc = apei_exec_run_optional(&ctx, ACPI_ERST_BEGIN_CLEAR);
if (rc)
return rc;
apei_exec_ctx_set_input(&ctx, record_id);
rc = apei_exec_run(&ctx, ACPI_ERST_SET_RECORD_ID);
if (rc)
return rc;
rc = apei_exec_run(&ctx, ACPI_ERST_EXECUTE_OPERATION);
if (rc)
return rc;
for (;;) {
rc = apei_exec_run(&ctx, ACPI_ERST_CHECK_BUSY_STATUS);
if (rc)
return rc;
val = apei_exec_ctx_get_output(&ctx);
if (!val)
break;
if (erst_timedout(&timeout, SPIN_UNIT))
return -EIO;
}
rc = apei_exec_run(&ctx, ACPI_ERST_GET_COMMAND_STATUS);
if (rc)
return rc;
val = apei_exec_ctx_get_output(&ctx);
rc = apei_exec_run_optional(&ctx, ACPI_ERST_END);
if (rc)
return rc;
return erst_errno(val);
}
/* NVRAM ERST Error Log Address Range is not supported yet */
static void pr_unimpl_nvram(void)
{
if (printk_ratelimit())
pr_warn("NVRAM ERST Log Address Range not implemented yet.\n");
}
static int __erst_write_to_nvram(const struct cper_record_header *record)
{
/* do not print message, because printk is not safe for NMI */
return -ENOSYS;
}
static int __erst_read_to_erange_from_nvram(u64 record_id, u64 *offset)
{
pr_unimpl_nvram();
return -ENOSYS;
}
static int __erst_clear_from_nvram(u64 record_id)
{
pr_unimpl_nvram();
return -ENOSYS;
}
int erst_write(const struct cper_record_header *record)
{
int rc;
unsigned long flags;
struct cper_record_header *rcd_erange;
if (erst_disable)
return -ENODEV;
if (memcmp(record->signature, CPER_SIG_RECORD, CPER_SIG_SIZE))
return -EINVAL;
if (erst_erange.attr & ERST_RANGE_NVRAM) {
if (!raw_spin_trylock_irqsave(&erst_lock, flags))
return -EBUSY;
rc = __erst_write_to_nvram(record);
raw_spin_unlock_irqrestore(&erst_lock, flags);
return rc;
}
if (record->record_length > erst_erange.size)
return -EINVAL;
if (!raw_spin_trylock_irqsave(&erst_lock, flags))
return -EBUSY;
memcpy(erst_erange.vaddr, record, record->record_length);
rcd_erange = erst_erange.vaddr;
/* signature for serialization system */
memcpy(&rcd_erange->persistence_information, "ER", 2);
rc = __erst_write_to_storage(0);
raw_spin_unlock_irqrestore(&erst_lock, flags);
return rc;
}
EXPORT_SYMBOL_GPL(erst_write);
static int __erst_read_to_erange(u64 record_id, u64 *offset)
{
int rc;
if (erst_erange.attr & ERST_RANGE_NVRAM)
return __erst_read_to_erange_from_nvram(
record_id, offset);
rc = __erst_read_from_storage(record_id, 0);
if (rc)
return rc;
*offset = 0;
return 0;
}
static ssize_t __erst_read(u64 record_id, struct cper_record_header *record,
size_t buflen)
{
int rc;
u64 offset, len = 0;
struct cper_record_header *rcd_tmp;
rc = __erst_read_to_erange(record_id, &offset);
if (rc)
return rc;
rcd_tmp = erst_erange.vaddr + offset;
len = rcd_tmp->record_length;
if (len <= buflen)
memcpy(record, rcd_tmp, len);
return len;
}
/*
* If return value > buflen, the buffer size is not big enough,
* else if return value < 0, something goes wrong,
* else everything is OK, and return value is record length
*/
ssize_t erst_read(u64 record_id, struct cper_record_header *record,
size_t buflen)
{
ssize_t len;
unsigned long flags;
if (erst_disable)
return -ENODEV;
raw_spin_lock_irqsave(&erst_lock, flags);
len = __erst_read(record_id, record, buflen);
raw_spin_unlock_irqrestore(&erst_lock, flags);
return len;
}
EXPORT_SYMBOL_GPL(erst_read);
int erst_clear(u64 record_id)
{
int rc, i;
unsigned long flags;
u64 *entries;
if (erst_disable)
return -ENODEV;
rc = mutex_lock_interruptible(&erst_record_id_cache.lock);
if (rc)
return rc;
raw_spin_lock_irqsave(&erst_lock, flags);
if (erst_erange.attr & ERST_RANGE_NVRAM)
rc = __erst_clear_from_nvram(record_id);
else
rc = __erst_clear_from_storage(record_id);
raw_spin_unlock_irqrestore(&erst_lock, flags);
if (rc)
goto out;
entries = erst_record_id_cache.entries;
for (i = 0; i < erst_record_id_cache.len; i++) {
if (entries[i] == record_id)
entries[i] = APEI_ERST_INVALID_RECORD_ID;
}
__erst_record_id_cache_compact();
out:
mutex_unlock(&erst_record_id_cache.lock);
return rc;
}
EXPORT_SYMBOL_GPL(erst_clear);
static int __init setup_erst_disable(char *str)
{
erst_disable = 1;
return 0;
}
__setup("erst_disable", setup_erst_disable);
static int erst_check_table(struct acpi_table_erst *erst_tab)
{
if ((erst_tab->header_length !=
(sizeof(struct acpi_table_erst) - sizeof(erst_tab->header)))
&& (erst_tab->header_length != sizeof(struct acpi_table_erst)))
return -EINVAL;
if (erst_tab->header.length < sizeof(struct acpi_table_erst))
return -EINVAL;
if (erst_tab->entries !=
(erst_tab->header.length - sizeof(struct acpi_table_erst)) /
sizeof(struct acpi_erst_entry))
return -EINVAL;
return 0;
}
static int erst_open_pstore(struct pstore_info *psi);
static int erst_close_pstore(struct pstore_info *psi);
static ssize_t erst_reader(struct pstore_record *record);
static int erst_writer(struct pstore_record *record);
static int erst_clearer(struct pstore_record *record);
static struct pstore_info erst_info = {
.owner = THIS_MODULE,
.name = "erst",
.flags = PSTORE_FLAGS_DMESG,
.open = erst_open_pstore,
.close = erst_close_pstore,
.read = erst_reader,
.write = erst_writer,
.erase = erst_clearer
};
#define CPER_CREATOR_PSTORE \
UUID_LE(0x75a574e3, 0x5052, 0x4b29, 0x8a, 0x8e, 0xbe, 0x2c, \
0x64, 0x90, 0xb8, 0x9d)
#define CPER_SECTION_TYPE_DMESG \
UUID_LE(0xc197e04e, 0xd545, 0x4a70, 0x9c, 0x17, 0xa5, 0x54, \
0x94, 0x19, 0xeb, 0x12)
#define CPER_SECTION_TYPE_DMESG_Z \
UUID_LE(0x4f118707, 0x04dd, 0x4055, 0xb5, 0xdd, 0x95, 0x6d, \
0x34, 0xdd, 0xfa, 0xc6)
#define CPER_SECTION_TYPE_MCE \
UUID_LE(0xfe08ffbe, 0x95e4, 0x4be7, 0xbc, 0x73, 0x40, 0x96, \
0x04, 0x4a, 0x38, 0xfc)
struct cper_pstore_record {
struct cper_record_header hdr;
struct cper_section_descriptor sec_hdr;
char data[];
} __packed;
static int reader_pos;
static int erst_open_pstore(struct pstore_info *psi)
{
int rc;
if (erst_disable)
return -ENODEV;
rc = erst_get_record_id_begin(&reader_pos);
return rc;
}
static int erst_close_pstore(struct pstore_info *psi)
{
erst_get_record_id_end();
return 0;
}
static ssize_t erst_reader(struct pstore_record *record)
{
int rc;
ssize_t len = 0;
u64 record_id;
pstore: pass allocated memory region back to caller The buf_lock cannot be held while populating the inodes, so make the backend pass forward an allocated and filled buffer instead. This solves the following backtrace. The effect is that "buf" is only ever used to notify the backends that something was written to it, and shouldn't be used in the read path. To replace the buf_lock during the read path, isolate the open/read/close loop with a separate mutex to maintain serialized access to the backend. Note that is is up to the pstore backend to cope if the (*write)() path is called in the middle of the read path. [ 59.691019] BUG: sleeping function called from invalid context at .../mm/slub.c:847 [ 59.691019] in_atomic(): 0, irqs_disabled(): 1, pid: 1819, name: mount [ 59.691019] Pid: 1819, comm: mount Not tainted 3.0.8 #1 [ 59.691019] Call Trace: [ 59.691019] [<810252d5>] __might_sleep+0xc3/0xca [ 59.691019] [<810a26e6>] kmem_cache_alloc+0x32/0xf3 [ 59.691019] [<810b53ac>] ? __d_lookup_rcu+0x6f/0xf4 [ 59.691019] [<810b68b1>] alloc_inode+0x2a/0x64 [ 59.691019] [<810b6903>] new_inode+0x18/0x43 [ 59.691019] [<81142447>] pstore_get_inode.isra.1+0x11/0x98 [ 59.691019] [<81142623>] pstore_mkfile+0xae/0x26f [ 59.691019] [<810a2a66>] ? kmem_cache_free+0x19/0xb1 [ 59.691019] [<8116c821>] ? ida_get_new_above+0x140/0x158 [ 59.691019] [<811708ea>] ? __init_rwsem+0x1e/0x2c [ 59.691019] [<810b67e8>] ? inode_init_always+0x111/0x1b0 [ 59.691019] [<8102127e>] ? should_resched+0xd/0x27 [ 59.691019] [<8137977f>] ? _cond_resched+0xd/0x21 [ 59.691019] [<81142abf>] pstore_get_records+0x52/0xa7 [ 59.691019] [<8114254b>] pstore_fill_super+0x7d/0x91 [ 59.691019] [<810a7ff5>] mount_single+0x46/0x82 [ 59.691019] [<8114231a>] pstore_mount+0x15/0x17 [ 59.691019] [<811424ce>] ? pstore_get_inode.isra.1+0x98/0x98 [ 59.691019] [<810a8199>] mount_fs+0x5a/0x12d [ 59.691019] [<810b9174>] ? alloc_vfsmnt+0xa4/0x14a [ 59.691019] [<810b9474>] vfs_kern_mount+0x4f/0x7d [ 59.691019] [<810b9d7e>] do_kern_mount+0x34/0xb2 [ 59.691019] [<810bb15f>] do_mount+0x5fc/0x64a [ 59.691019] [<810912fb>] ? strndup_user+0x2e/0x3f [ 59.691019] [<810bb3cb>] sys_mount+0x66/0x99 [ 59.691019] [<8137b537>] sysenter_do_call+0x12/0x26 Signed-off-by: Kees Cook <keescook@chromium.org> Signed-off-by: Tony Luck <tony.luck@intel.com>
2011-11-18 04:58:07 +08:00
struct cper_pstore_record *rcd;
size_t rcd_len = sizeof(*rcd) + erst_info.bufsize;
if (erst_disable)
return -ENODEV;
pstore: pass allocated memory region back to caller The buf_lock cannot be held while populating the inodes, so make the backend pass forward an allocated and filled buffer instead. This solves the following backtrace. The effect is that "buf" is only ever used to notify the backends that something was written to it, and shouldn't be used in the read path. To replace the buf_lock during the read path, isolate the open/read/close loop with a separate mutex to maintain serialized access to the backend. Note that is is up to the pstore backend to cope if the (*write)() path is called in the middle of the read path. [ 59.691019] BUG: sleeping function called from invalid context at .../mm/slub.c:847 [ 59.691019] in_atomic(): 0, irqs_disabled(): 1, pid: 1819, name: mount [ 59.691019] Pid: 1819, comm: mount Not tainted 3.0.8 #1 [ 59.691019] Call Trace: [ 59.691019] [<810252d5>] __might_sleep+0xc3/0xca [ 59.691019] [<810a26e6>] kmem_cache_alloc+0x32/0xf3 [ 59.691019] [<810b53ac>] ? __d_lookup_rcu+0x6f/0xf4 [ 59.691019] [<810b68b1>] alloc_inode+0x2a/0x64 [ 59.691019] [<810b6903>] new_inode+0x18/0x43 [ 59.691019] [<81142447>] pstore_get_inode.isra.1+0x11/0x98 [ 59.691019] [<81142623>] pstore_mkfile+0xae/0x26f [ 59.691019] [<810a2a66>] ? kmem_cache_free+0x19/0xb1 [ 59.691019] [<8116c821>] ? ida_get_new_above+0x140/0x158 [ 59.691019] [<811708ea>] ? __init_rwsem+0x1e/0x2c [ 59.691019] [<810b67e8>] ? inode_init_always+0x111/0x1b0 [ 59.691019] [<8102127e>] ? should_resched+0xd/0x27 [ 59.691019] [<8137977f>] ? _cond_resched+0xd/0x21 [ 59.691019] [<81142abf>] pstore_get_records+0x52/0xa7 [ 59.691019] [<8114254b>] pstore_fill_super+0x7d/0x91 [ 59.691019] [<810a7ff5>] mount_single+0x46/0x82 [ 59.691019] [<8114231a>] pstore_mount+0x15/0x17 [ 59.691019] [<811424ce>] ? pstore_get_inode.isra.1+0x98/0x98 [ 59.691019] [<810a8199>] mount_fs+0x5a/0x12d [ 59.691019] [<810b9174>] ? alloc_vfsmnt+0xa4/0x14a [ 59.691019] [<810b9474>] vfs_kern_mount+0x4f/0x7d [ 59.691019] [<810b9d7e>] do_kern_mount+0x34/0xb2 [ 59.691019] [<810bb15f>] do_mount+0x5fc/0x64a [ 59.691019] [<810912fb>] ? strndup_user+0x2e/0x3f [ 59.691019] [<810bb3cb>] sys_mount+0x66/0x99 [ 59.691019] [<8137b537>] sysenter_do_call+0x12/0x26 Signed-off-by: Kees Cook <keescook@chromium.org> Signed-off-by: Tony Luck <tony.luck@intel.com>
2011-11-18 04:58:07 +08:00
rcd = kmalloc(rcd_len, GFP_KERNEL);
if (!rcd) {
rc = -ENOMEM;
goto out;
}
skip:
rc = erst_get_record_id_next(&reader_pos, &record_id);
if (rc)
goto out;
/* no more record */
if (record_id == APEI_ERST_INVALID_RECORD_ID) {
pstore: pass allocated memory region back to caller The buf_lock cannot be held while populating the inodes, so make the backend pass forward an allocated and filled buffer instead. This solves the following backtrace. The effect is that "buf" is only ever used to notify the backends that something was written to it, and shouldn't be used in the read path. To replace the buf_lock during the read path, isolate the open/read/close loop with a separate mutex to maintain serialized access to the backend. Note that is is up to the pstore backend to cope if the (*write)() path is called in the middle of the read path. [ 59.691019] BUG: sleeping function called from invalid context at .../mm/slub.c:847 [ 59.691019] in_atomic(): 0, irqs_disabled(): 1, pid: 1819, name: mount [ 59.691019] Pid: 1819, comm: mount Not tainted 3.0.8 #1 [ 59.691019] Call Trace: [ 59.691019] [<810252d5>] __might_sleep+0xc3/0xca [ 59.691019] [<810a26e6>] kmem_cache_alloc+0x32/0xf3 [ 59.691019] [<810b53ac>] ? __d_lookup_rcu+0x6f/0xf4 [ 59.691019] [<810b68b1>] alloc_inode+0x2a/0x64 [ 59.691019] [<810b6903>] new_inode+0x18/0x43 [ 59.691019] [<81142447>] pstore_get_inode.isra.1+0x11/0x98 [ 59.691019] [<81142623>] pstore_mkfile+0xae/0x26f [ 59.691019] [<810a2a66>] ? kmem_cache_free+0x19/0xb1 [ 59.691019] [<8116c821>] ? ida_get_new_above+0x140/0x158 [ 59.691019] [<811708ea>] ? __init_rwsem+0x1e/0x2c [ 59.691019] [<810b67e8>] ? inode_init_always+0x111/0x1b0 [ 59.691019] [<8102127e>] ? should_resched+0xd/0x27 [ 59.691019] [<8137977f>] ? _cond_resched+0xd/0x21 [ 59.691019] [<81142abf>] pstore_get_records+0x52/0xa7 [ 59.691019] [<8114254b>] pstore_fill_super+0x7d/0x91 [ 59.691019] [<810a7ff5>] mount_single+0x46/0x82 [ 59.691019] [<8114231a>] pstore_mount+0x15/0x17 [ 59.691019] [<811424ce>] ? pstore_get_inode.isra.1+0x98/0x98 [ 59.691019] [<810a8199>] mount_fs+0x5a/0x12d [ 59.691019] [<810b9174>] ? alloc_vfsmnt+0xa4/0x14a [ 59.691019] [<810b9474>] vfs_kern_mount+0x4f/0x7d [ 59.691019] [<810b9d7e>] do_kern_mount+0x34/0xb2 [ 59.691019] [<810bb15f>] do_mount+0x5fc/0x64a [ 59.691019] [<810912fb>] ? strndup_user+0x2e/0x3f [ 59.691019] [<810bb3cb>] sys_mount+0x66/0x99 [ 59.691019] [<8137b537>] sysenter_do_call+0x12/0x26 Signed-off-by: Kees Cook <keescook@chromium.org> Signed-off-by: Tony Luck <tony.luck@intel.com>
2011-11-18 04:58:07 +08:00
rc = -EINVAL;
goto out;
}
pstore: pass allocated memory region back to caller The buf_lock cannot be held while populating the inodes, so make the backend pass forward an allocated and filled buffer instead. This solves the following backtrace. The effect is that "buf" is only ever used to notify the backends that something was written to it, and shouldn't be used in the read path. To replace the buf_lock during the read path, isolate the open/read/close loop with a separate mutex to maintain serialized access to the backend. Note that is is up to the pstore backend to cope if the (*write)() path is called in the middle of the read path. [ 59.691019] BUG: sleeping function called from invalid context at .../mm/slub.c:847 [ 59.691019] in_atomic(): 0, irqs_disabled(): 1, pid: 1819, name: mount [ 59.691019] Pid: 1819, comm: mount Not tainted 3.0.8 #1 [ 59.691019] Call Trace: [ 59.691019] [<810252d5>] __might_sleep+0xc3/0xca [ 59.691019] [<810a26e6>] kmem_cache_alloc+0x32/0xf3 [ 59.691019] [<810b53ac>] ? __d_lookup_rcu+0x6f/0xf4 [ 59.691019] [<810b68b1>] alloc_inode+0x2a/0x64 [ 59.691019] [<810b6903>] new_inode+0x18/0x43 [ 59.691019] [<81142447>] pstore_get_inode.isra.1+0x11/0x98 [ 59.691019] [<81142623>] pstore_mkfile+0xae/0x26f [ 59.691019] [<810a2a66>] ? kmem_cache_free+0x19/0xb1 [ 59.691019] [<8116c821>] ? ida_get_new_above+0x140/0x158 [ 59.691019] [<811708ea>] ? __init_rwsem+0x1e/0x2c [ 59.691019] [<810b67e8>] ? inode_init_always+0x111/0x1b0 [ 59.691019] [<8102127e>] ? should_resched+0xd/0x27 [ 59.691019] [<8137977f>] ? _cond_resched+0xd/0x21 [ 59.691019] [<81142abf>] pstore_get_records+0x52/0xa7 [ 59.691019] [<8114254b>] pstore_fill_super+0x7d/0x91 [ 59.691019] [<810a7ff5>] mount_single+0x46/0x82 [ 59.691019] [<8114231a>] pstore_mount+0x15/0x17 [ 59.691019] [<811424ce>] ? pstore_get_inode.isra.1+0x98/0x98 [ 59.691019] [<810a8199>] mount_fs+0x5a/0x12d [ 59.691019] [<810b9174>] ? alloc_vfsmnt+0xa4/0x14a [ 59.691019] [<810b9474>] vfs_kern_mount+0x4f/0x7d [ 59.691019] [<810b9d7e>] do_kern_mount+0x34/0xb2 [ 59.691019] [<810bb15f>] do_mount+0x5fc/0x64a [ 59.691019] [<810912fb>] ? strndup_user+0x2e/0x3f [ 59.691019] [<810bb3cb>] sys_mount+0x66/0x99 [ 59.691019] [<8137b537>] sysenter_do_call+0x12/0x26 Signed-off-by: Kees Cook <keescook@chromium.org> Signed-off-by: Tony Luck <tony.luck@intel.com>
2011-11-18 04:58:07 +08:00
len = erst_read(record_id, &rcd->hdr, rcd_len);
/* The record may be cleared by others, try read next record */
if (len == -ENOENT)
goto skip;
pstore: pass allocated memory region back to caller The buf_lock cannot be held while populating the inodes, so make the backend pass forward an allocated and filled buffer instead. This solves the following backtrace. The effect is that "buf" is only ever used to notify the backends that something was written to it, and shouldn't be used in the read path. To replace the buf_lock during the read path, isolate the open/read/close loop with a separate mutex to maintain serialized access to the backend. Note that is is up to the pstore backend to cope if the (*write)() path is called in the middle of the read path. [ 59.691019] BUG: sleeping function called from invalid context at .../mm/slub.c:847 [ 59.691019] in_atomic(): 0, irqs_disabled(): 1, pid: 1819, name: mount [ 59.691019] Pid: 1819, comm: mount Not tainted 3.0.8 #1 [ 59.691019] Call Trace: [ 59.691019] [<810252d5>] __might_sleep+0xc3/0xca [ 59.691019] [<810a26e6>] kmem_cache_alloc+0x32/0xf3 [ 59.691019] [<810b53ac>] ? __d_lookup_rcu+0x6f/0xf4 [ 59.691019] [<810b68b1>] alloc_inode+0x2a/0x64 [ 59.691019] [<810b6903>] new_inode+0x18/0x43 [ 59.691019] [<81142447>] pstore_get_inode.isra.1+0x11/0x98 [ 59.691019] [<81142623>] pstore_mkfile+0xae/0x26f [ 59.691019] [<810a2a66>] ? kmem_cache_free+0x19/0xb1 [ 59.691019] [<8116c821>] ? ida_get_new_above+0x140/0x158 [ 59.691019] [<811708ea>] ? __init_rwsem+0x1e/0x2c [ 59.691019] [<810b67e8>] ? inode_init_always+0x111/0x1b0 [ 59.691019] [<8102127e>] ? should_resched+0xd/0x27 [ 59.691019] [<8137977f>] ? _cond_resched+0xd/0x21 [ 59.691019] [<81142abf>] pstore_get_records+0x52/0xa7 [ 59.691019] [<8114254b>] pstore_fill_super+0x7d/0x91 [ 59.691019] [<810a7ff5>] mount_single+0x46/0x82 [ 59.691019] [<8114231a>] pstore_mount+0x15/0x17 [ 59.691019] [<811424ce>] ? pstore_get_inode.isra.1+0x98/0x98 [ 59.691019] [<810a8199>] mount_fs+0x5a/0x12d [ 59.691019] [<810b9174>] ? alloc_vfsmnt+0xa4/0x14a [ 59.691019] [<810b9474>] vfs_kern_mount+0x4f/0x7d [ 59.691019] [<810b9d7e>] do_kern_mount+0x34/0xb2 [ 59.691019] [<810bb15f>] do_mount+0x5fc/0x64a [ 59.691019] [<810912fb>] ? strndup_user+0x2e/0x3f [ 59.691019] [<810bb3cb>] sys_mount+0x66/0x99 [ 59.691019] [<8137b537>] sysenter_do_call+0x12/0x26 Signed-off-by: Kees Cook <keescook@chromium.org> Signed-off-by: Tony Luck <tony.luck@intel.com>
2011-11-18 04:58:07 +08:00
else if (len < sizeof(*rcd)) {
rc = -EIO;
goto out;
}
if (uuid_le_cmp(rcd->hdr.creator_id, CPER_CREATOR_PSTORE) != 0)
goto skip;
record->buf = kmalloc(len, GFP_KERNEL);
if (record->buf == NULL) {
pstore: pass allocated memory region back to caller The buf_lock cannot be held while populating the inodes, so make the backend pass forward an allocated and filled buffer instead. This solves the following backtrace. The effect is that "buf" is only ever used to notify the backends that something was written to it, and shouldn't be used in the read path. To replace the buf_lock during the read path, isolate the open/read/close loop with a separate mutex to maintain serialized access to the backend. Note that is is up to the pstore backend to cope if the (*write)() path is called in the middle of the read path. [ 59.691019] BUG: sleeping function called from invalid context at .../mm/slub.c:847 [ 59.691019] in_atomic(): 0, irqs_disabled(): 1, pid: 1819, name: mount [ 59.691019] Pid: 1819, comm: mount Not tainted 3.0.8 #1 [ 59.691019] Call Trace: [ 59.691019] [<810252d5>] __might_sleep+0xc3/0xca [ 59.691019] [<810a26e6>] kmem_cache_alloc+0x32/0xf3 [ 59.691019] [<810b53ac>] ? __d_lookup_rcu+0x6f/0xf4 [ 59.691019] [<810b68b1>] alloc_inode+0x2a/0x64 [ 59.691019] [<810b6903>] new_inode+0x18/0x43 [ 59.691019] [<81142447>] pstore_get_inode.isra.1+0x11/0x98 [ 59.691019] [<81142623>] pstore_mkfile+0xae/0x26f [ 59.691019] [<810a2a66>] ? kmem_cache_free+0x19/0xb1 [ 59.691019] [<8116c821>] ? ida_get_new_above+0x140/0x158 [ 59.691019] [<811708ea>] ? __init_rwsem+0x1e/0x2c [ 59.691019] [<810b67e8>] ? inode_init_always+0x111/0x1b0 [ 59.691019] [<8102127e>] ? should_resched+0xd/0x27 [ 59.691019] [<8137977f>] ? _cond_resched+0xd/0x21 [ 59.691019] [<81142abf>] pstore_get_records+0x52/0xa7 [ 59.691019] [<8114254b>] pstore_fill_super+0x7d/0x91 [ 59.691019] [<810a7ff5>] mount_single+0x46/0x82 [ 59.691019] [<8114231a>] pstore_mount+0x15/0x17 [ 59.691019] [<811424ce>] ? pstore_get_inode.isra.1+0x98/0x98 [ 59.691019] [<810a8199>] mount_fs+0x5a/0x12d [ 59.691019] [<810b9174>] ? alloc_vfsmnt+0xa4/0x14a [ 59.691019] [<810b9474>] vfs_kern_mount+0x4f/0x7d [ 59.691019] [<810b9d7e>] do_kern_mount+0x34/0xb2 [ 59.691019] [<810bb15f>] do_mount+0x5fc/0x64a [ 59.691019] [<810912fb>] ? strndup_user+0x2e/0x3f [ 59.691019] [<810bb3cb>] sys_mount+0x66/0x99 [ 59.691019] [<8137b537>] sysenter_do_call+0x12/0x26 Signed-off-by: Kees Cook <keescook@chromium.org> Signed-off-by: Tony Luck <tony.luck@intel.com>
2011-11-18 04:58:07 +08:00
rc = -ENOMEM;
goto out;
}
memcpy(record->buf, rcd->data, len - sizeof(*rcd));
record->id = record_id;
record->compressed = false;
record->ecc_notice_size = 0;
if (uuid_le_cmp(rcd->sec_hdr.section_type,
CPER_SECTION_TYPE_DMESG_Z) == 0) {
record->type = PSTORE_TYPE_DMESG;
record->compressed = true;
} else if (uuid_le_cmp(rcd->sec_hdr.section_type,
CPER_SECTION_TYPE_DMESG) == 0)
record->type = PSTORE_TYPE_DMESG;
else if (uuid_le_cmp(rcd->sec_hdr.section_type,
CPER_SECTION_TYPE_MCE) == 0)
record->type = PSTORE_TYPE_MCE;
else
record->type = PSTORE_TYPE_UNKNOWN;
if (rcd->hdr.validation_bits & CPER_VALID_TIMESTAMP)
record->time.tv_sec = rcd->hdr.timestamp;
else
record->time.tv_sec = 0;
record->time.tv_nsec = 0;
out:
pstore: pass allocated memory region back to caller The buf_lock cannot be held while populating the inodes, so make the backend pass forward an allocated and filled buffer instead. This solves the following backtrace. The effect is that "buf" is only ever used to notify the backends that something was written to it, and shouldn't be used in the read path. To replace the buf_lock during the read path, isolate the open/read/close loop with a separate mutex to maintain serialized access to the backend. Note that is is up to the pstore backend to cope if the (*write)() path is called in the middle of the read path. [ 59.691019] BUG: sleeping function called from invalid context at .../mm/slub.c:847 [ 59.691019] in_atomic(): 0, irqs_disabled(): 1, pid: 1819, name: mount [ 59.691019] Pid: 1819, comm: mount Not tainted 3.0.8 #1 [ 59.691019] Call Trace: [ 59.691019] [<810252d5>] __might_sleep+0xc3/0xca [ 59.691019] [<810a26e6>] kmem_cache_alloc+0x32/0xf3 [ 59.691019] [<810b53ac>] ? __d_lookup_rcu+0x6f/0xf4 [ 59.691019] [<810b68b1>] alloc_inode+0x2a/0x64 [ 59.691019] [<810b6903>] new_inode+0x18/0x43 [ 59.691019] [<81142447>] pstore_get_inode.isra.1+0x11/0x98 [ 59.691019] [<81142623>] pstore_mkfile+0xae/0x26f [ 59.691019] [<810a2a66>] ? kmem_cache_free+0x19/0xb1 [ 59.691019] [<8116c821>] ? ida_get_new_above+0x140/0x158 [ 59.691019] [<811708ea>] ? __init_rwsem+0x1e/0x2c [ 59.691019] [<810b67e8>] ? inode_init_always+0x111/0x1b0 [ 59.691019] [<8102127e>] ? should_resched+0xd/0x27 [ 59.691019] [<8137977f>] ? _cond_resched+0xd/0x21 [ 59.691019] [<81142abf>] pstore_get_records+0x52/0xa7 [ 59.691019] [<8114254b>] pstore_fill_super+0x7d/0x91 [ 59.691019] [<810a7ff5>] mount_single+0x46/0x82 [ 59.691019] [<8114231a>] pstore_mount+0x15/0x17 [ 59.691019] [<811424ce>] ? pstore_get_inode.isra.1+0x98/0x98 [ 59.691019] [<810a8199>] mount_fs+0x5a/0x12d [ 59.691019] [<810b9174>] ? alloc_vfsmnt+0xa4/0x14a [ 59.691019] [<810b9474>] vfs_kern_mount+0x4f/0x7d [ 59.691019] [<810b9d7e>] do_kern_mount+0x34/0xb2 [ 59.691019] [<810bb15f>] do_mount+0x5fc/0x64a [ 59.691019] [<810912fb>] ? strndup_user+0x2e/0x3f [ 59.691019] [<810bb3cb>] sys_mount+0x66/0x99 [ 59.691019] [<8137b537>] sysenter_do_call+0x12/0x26 Signed-off-by: Kees Cook <keescook@chromium.org> Signed-off-by: Tony Luck <tony.luck@intel.com>
2011-11-18 04:58:07 +08:00
kfree(rcd);
return (rc < 0) ? rc : (len - sizeof(*rcd));
}
static int erst_writer(struct pstore_record *record)
{
struct cper_pstore_record *rcd = (struct cper_pstore_record *)
(erst_info.buf - sizeof(*rcd));
int ret;
memset(rcd, 0, sizeof(*rcd));
memcpy(rcd->hdr.signature, CPER_SIG_RECORD, CPER_SIG_SIZE);
rcd->hdr.revision = CPER_RECORD_REV;
rcd->hdr.signature_end = CPER_SIG_END;
rcd->hdr.section_count = 1;
rcd->hdr.error_severity = CPER_SEV_FATAL;
/* timestamp valid. platform_id, partition_id are invalid */
rcd->hdr.validation_bits = CPER_VALID_TIMESTAMP;
rcd->hdr.timestamp = get_seconds();
rcd->hdr.record_length = sizeof(*rcd) + record->size;
rcd->hdr.creator_id = CPER_CREATOR_PSTORE;
rcd->hdr.notification_type = CPER_NOTIFY_MCE;
rcd->hdr.record_id = cper_next_record_id();
rcd->hdr.flags = CPER_HW_ERROR_FLAGS_PREVERR;
rcd->sec_hdr.section_offset = sizeof(*rcd);
rcd->sec_hdr.section_length = record->size;
rcd->sec_hdr.revision = CPER_SEC_REV;
/* fru_id and fru_text is invalid */
rcd->sec_hdr.validation_bits = 0;
rcd->sec_hdr.flags = CPER_SEC_PRIMARY;
switch (record->type) {
case PSTORE_TYPE_DMESG:
if (record->compressed)
rcd->sec_hdr.section_type = CPER_SECTION_TYPE_DMESG_Z;
else
rcd->sec_hdr.section_type = CPER_SECTION_TYPE_DMESG;
break;
case PSTORE_TYPE_MCE:
rcd->sec_hdr.section_type = CPER_SECTION_TYPE_MCE;
break;
default:
return -EINVAL;
}
rcd->sec_hdr.section_severity = CPER_SEV_FATAL;
ret = erst_write(&rcd->hdr);
record->id = rcd->hdr.record_id;
return ret;
}
static int erst_clearer(struct pstore_record *record)
{
return erst_clear(record->id);
}
static int __init erst_init(void)
{
int rc = 0;
acpi_status status;
struct apei_exec_context ctx;
struct apei_resources erst_resources;
struct resource *r;
char *buf;
if (acpi_disabled)
goto err;
if (erst_disable) {
pr_info(
"Error Record Serialization Table (ERST) support is disabled.\n");
goto err;
}
status = acpi_get_table(ACPI_SIG_ERST, 0,
(struct acpi_table_header **)&erst_tab);
if (status == AE_NOT_FOUND)
goto err;
else if (ACPI_FAILURE(status)) {
const char *msg = acpi_format_exception(status);
pr_err("Failed to get table, %s\n", msg);
rc = -EINVAL;
goto err;
}
rc = erst_check_table(erst_tab);
if (rc) {
pr_err(FW_BUG "ERST table is invalid.\n");
goto err;
}
apei_resources_init(&erst_resources);
erst_exec_ctx_init(&ctx);
rc = apei_exec_collect_resources(&ctx, &erst_resources);
if (rc)
goto err_fini;
rc = apei_resources_request(&erst_resources, "APEI ERST");
if (rc)
goto err_fini;
rc = apei_exec_pre_map_gars(&ctx);
if (rc)
goto err_release;
rc = erst_get_erange(&erst_erange);
if (rc) {
if (rc == -ENODEV)
pr_info(
"The corresponding hardware device or firmware implementation "
"is not available.\n");
else
pr_err("Failed to get Error Log Address Range.\n");
goto err_unmap_reg;
}
r = request_mem_region(erst_erange.base, erst_erange.size, "APEI ERST");
if (!r) {
pr_err("Can not request [mem %#010llx-%#010llx] for ERST.\n",
(unsigned long long)erst_erange.base,
(unsigned long long)erst_erange.base + erst_erange.size - 1);
rc = -EIO;
goto err_unmap_reg;
}
rc = -ENOMEM;
erst_erange.vaddr = ioremap_cache(erst_erange.base,
erst_erange.size);
if (!erst_erange.vaddr)
goto err_release_erange;
pr_info(
"Error Record Serialization Table (ERST) support is initialized.\n");
buf = kmalloc(erst_erange.size, GFP_KERNEL);
spin_lock_init(&erst_info.buf_lock);
if (buf) {
erst_info.buf = buf + sizeof(struct cper_pstore_record);
erst_info.bufsize = erst_erange.size -
sizeof(struct cper_pstore_record);
rc = pstore_register(&erst_info);
if (rc) {
if (rc != -EPERM)
pr_info(
"Could not register with persistent store.\n");
erst_info.buf = NULL;
erst_info.bufsize = 0;
kfree(buf);
}
} else
pr_err(
"Failed to allocate %lld bytes for persistent store error log.\n",
erst_erange.size);
/* Cleanup ERST Resources */
apei_resources_fini(&erst_resources);
return 0;
err_release_erange:
release_mem_region(erst_erange.base, erst_erange.size);
err_unmap_reg:
apei_exec_post_unmap_gars(&ctx);
err_release:
apei_resources_release(&erst_resources);
err_fini:
apei_resources_fini(&erst_resources);
err:
erst_disable = 1;
return rc;
}
device_initcall(erst_init);