OpenCloudOS-Kernel/drivers/firmware/efi/efivars.c

755 lines
19 KiB
C
Raw Normal View History

/*
* Originally from efivars.c,
*
* Copyright (C) 2001,2003,2004 Dell <Matt_Domsch@dell.com>
* Copyright (C) 2004 Intel Corporation <matthew.e.tolentino@intel.com>
*
* This code takes all variables accessible from EFI runtime and
* exports them via sysfs
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
* Changelog:
*
* 17 May 2004 - Matt Domsch <Matt_Domsch@dell.com>
* remove check for efi_enabled in exit
* add MODULE_VERSION
*
* 26 Apr 2004 - Matt Domsch <Matt_Domsch@dell.com>
* minor bug fixes
*
* 21 Apr 2004 - Matt Tolentino <matthew.e.tolentino@intel.com)
* converted driver to export variable information via sysfs
* and moved to drivers/firmware directory
* bumped revision number to v0.07 to reflect conversion & move
*
* 10 Dec 2002 - Matt Domsch <Matt_Domsch@dell.com>
* fix locking per Peter Chubb's findings
*
* 25 Mar 2002 - Matt Domsch <Matt_Domsch@dell.com>
* move uuid_unparse() to include/asm-ia64/efi.h:efi_guid_to_str()
*
* 12 Feb 2002 - Matt Domsch <Matt_Domsch@dell.com>
* use list_for_each_safe when deleting vars.
* remove ifdef CONFIG_SMP around include <linux/smp.h>
* v0.04 release to linux-ia64@linuxia64.org
*
* 20 April 2001 - Matt Domsch <Matt_Domsch@dell.com>
* Moved vars from /proc/efi to /proc/efi/vars, and made
* efi.c own the /proc/efi directory.
* v0.03 release to linux-ia64@linuxia64.org
*
* 26 March 2001 - Matt Domsch <Matt_Domsch@dell.com>
* At the request of Stephane, moved ownership of /proc/efi
* to efi.c, and now efivars lives under /proc/efi/vars.
*
* 12 March 2001 - Matt Domsch <Matt_Domsch@dell.com>
* Feedback received from Stephane Eranian incorporated.
* efivar_write() checks copy_from_user() return value.
* efivar_read/write() returns proper errno.
* v0.02 release to linux-ia64@linuxia64.org
*
* 26 February 2001 - Matt Domsch <Matt_Domsch@dell.com>
* v0.01 release to linux-ia64@linuxia64.org
*/
#include <linux/efi.h>
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/ucs2_string.h>
#include <linux/compat.h>
#define EFIVARS_VERSION "0.08"
#define EFIVARS_DATE "2004-May-17"
MODULE_AUTHOR("Matt Domsch <Matt_Domsch@Dell.com>");
MODULE_DESCRIPTION("sysfs interface to EFI Variables");
MODULE_LICENSE("GPL");
MODULE_VERSION(EFIVARS_VERSION);
MODULE_ALIAS("platform:efivars");
LIST_HEAD(efivar_sysfs_list);
EXPORT_SYMBOL_GPL(efivar_sysfs_list);
static struct kset *efivars_kset;
static struct bin_attribute *efivars_new_var;
static struct bin_attribute *efivars_del_var;
struct compat_efi_variable {
efi_char16_t VariableName[EFI_VAR_NAME_LEN/sizeof(efi_char16_t)];
efi_guid_t VendorGuid;
__u32 DataSize;
__u8 Data[1024];
__u32 Status;
__u32 Attributes;
} __packed;
struct efivar_attribute {
struct attribute attr;
ssize_t (*show) (struct efivar_entry *entry, char *buf);
ssize_t (*store)(struct efivar_entry *entry, const char *buf, size_t count);
};
#define EFIVAR_ATTR(_name, _mode, _show, _store) \
struct efivar_attribute efivar_attr_##_name = { \
.attr = {.name = __stringify(_name), .mode = _mode}, \
.show = _show, \
.store = _store, \
};
#define to_efivar_attr(_attr) container_of(_attr, struct efivar_attribute, attr)
#define to_efivar_entry(obj) container_of(obj, struct efivar_entry, kobj)
/*
* Prototype for sysfs creation function
*/
static int
efivar_create_sysfs_entry(struct efivar_entry *new_var);
static ssize_t
efivar_guid_read(struct efivar_entry *entry, char *buf)
{
struct efi_variable *var = &entry->var;
char *str = buf;
if (!entry || !buf)
return 0;
efi_guid_to_str(&var->VendorGuid, str);
str += strlen(str);
str += sprintf(str, "\n");
return str - buf;
}
static ssize_t
efivar_attr_read(struct efivar_entry *entry, char *buf)
{
struct efi_variable *var = &entry->var;
char *str = buf;
if (!entry || !buf)
return -EINVAL;
var->DataSize = 1024;
if (efivar_entry_get(entry, &var->Attributes, &var->DataSize, var->Data))
return -EIO;
if (var->Attributes & EFI_VARIABLE_NON_VOLATILE)
str += sprintf(str, "EFI_VARIABLE_NON_VOLATILE\n");
if (var->Attributes & EFI_VARIABLE_BOOTSERVICE_ACCESS)
str += sprintf(str, "EFI_VARIABLE_BOOTSERVICE_ACCESS\n");
if (var->Attributes & EFI_VARIABLE_RUNTIME_ACCESS)
str += sprintf(str, "EFI_VARIABLE_RUNTIME_ACCESS\n");
if (var->Attributes & EFI_VARIABLE_HARDWARE_ERROR_RECORD)
str += sprintf(str, "EFI_VARIABLE_HARDWARE_ERROR_RECORD\n");
if (var->Attributes & EFI_VARIABLE_AUTHENTICATED_WRITE_ACCESS)
str += sprintf(str,
"EFI_VARIABLE_AUTHENTICATED_WRITE_ACCESS\n");
if (var->Attributes &
EFI_VARIABLE_TIME_BASED_AUTHENTICATED_WRITE_ACCESS)
str += sprintf(str,
"EFI_VARIABLE_TIME_BASED_AUTHENTICATED_WRITE_ACCESS\n");
if (var->Attributes & EFI_VARIABLE_APPEND_WRITE)
str += sprintf(str, "EFI_VARIABLE_APPEND_WRITE\n");
return str - buf;
}
static ssize_t
efivar_size_read(struct efivar_entry *entry, char *buf)
{
struct efi_variable *var = &entry->var;
char *str = buf;
if (!entry || !buf)
return -EINVAL;
var->DataSize = 1024;
if (efivar_entry_get(entry, &var->Attributes, &var->DataSize, var->Data))
return -EIO;
str += sprintf(str, "0x%lx\n", var->DataSize);
return str - buf;
}
static ssize_t
efivar_data_read(struct efivar_entry *entry, char *buf)
{
struct efi_variable *var = &entry->var;
if (!entry || !buf)
return -EINVAL;
var->DataSize = 1024;
if (efivar_entry_get(entry, &var->Attributes, &var->DataSize, var->Data))
return -EIO;
memcpy(buf, var->Data, var->DataSize);
return var->DataSize;
}
static inline int
sanity_check(struct efi_variable *var, efi_char16_t *name, efi_guid_t vendor,
unsigned long size, u32 attributes, u8 *data)
{
/*
* If only updating the variable data, then the name
* and guid should remain the same
*/
if (memcmp(name, var->VariableName, sizeof(var->VariableName)) ||
efi_guidcmp(vendor, var->VendorGuid)) {
printk(KERN_ERR "efivars: Cannot edit the wrong variable!\n");
return -EINVAL;
}
if ((size <= 0) || (attributes == 0)){
printk(KERN_ERR "efivars: DataSize & Attributes must be valid!\n");
return -EINVAL;
}
if ((attributes & ~EFI_VARIABLE_MASK) != 0 ||
efivar_validate(vendor, name, data, size) == false) {
printk(KERN_ERR "efivars: Malformed variable content\n");
return -EINVAL;
}
return 0;
}
static void
copy_out_compat(struct efi_variable *dst, struct compat_efi_variable *src)
{
memcpy(dst->VariableName, src->VariableName, EFI_VAR_NAME_LEN);
memcpy(dst->Data, src->Data, sizeof(src->Data));
dst->VendorGuid = src->VendorGuid;
dst->DataSize = src->DataSize;
dst->Attributes = src->Attributes;
}
/*
* We allow each variable to be edited via rewriting the
* entire efi variable structure.
*/
static ssize_t
efivar_store_raw(struct efivar_entry *entry, const char *buf, size_t count)
{
struct efi_variable *new_var, *var = &entry->var;
efi_char16_t *name;
unsigned long size;
efi_guid_t vendor;
u32 attributes;
u8 *data;
int err;
if (in_compat_syscall()) {
struct compat_efi_variable *compat;
if (count != sizeof(*compat))
return -EINVAL;
compat = (struct compat_efi_variable *)buf;
attributes = compat->Attributes;
vendor = compat->VendorGuid;
name = compat->VariableName;
size = compat->DataSize;
data = compat->Data;
err = sanity_check(var, name, vendor, size, attributes, data);
if (err)
return err;
copy_out_compat(&entry->var, compat);
} else {
if (count != sizeof(struct efi_variable))
return -EINVAL;
new_var = (struct efi_variable *)buf;
attributes = new_var->Attributes;
vendor = new_var->VendorGuid;
name = new_var->VariableName;
size = new_var->DataSize;
data = new_var->Data;
err = sanity_check(var, name, vendor, size, attributes, data);
if (err)
return err;
memcpy(&entry->var, new_var, count);
}
err = efivar_entry_set(entry, attributes, size, data, NULL);
if (err) {
printk(KERN_WARNING "efivars: set_variable() failed: status=%d\n", err);
return -EIO;
}
return count;
}
static ssize_t
efivar_show_raw(struct efivar_entry *entry, char *buf)
{
struct efi_variable *var = &entry->var;
struct compat_efi_variable *compat;
size_t size;
if (!entry || !buf)
return 0;
var->DataSize = 1024;
if (efivar_entry_get(entry, &entry->var.Attributes,
&entry->var.DataSize, entry->var.Data))
return -EIO;
if (in_compat_syscall()) {
compat = (struct compat_efi_variable *)buf;
size = sizeof(*compat);
memcpy(compat->VariableName, var->VariableName,
EFI_VAR_NAME_LEN);
memcpy(compat->Data, var->Data, sizeof(compat->Data));
compat->VendorGuid = var->VendorGuid;
compat->DataSize = var->DataSize;
compat->Attributes = var->Attributes;
} else {
size = sizeof(*var);
memcpy(buf, var, size);
}
return size;
}
/*
* Generic read/write functions that call the specific functions of
* the attributes...
*/
static ssize_t efivar_attr_show(struct kobject *kobj, struct attribute *attr,
char *buf)
{
struct efivar_entry *var = to_efivar_entry(kobj);
struct efivar_attribute *efivar_attr = to_efivar_attr(attr);
ssize_t ret = -EIO;
if (!capable(CAP_SYS_ADMIN))
return -EACCES;
if (efivar_attr->show) {
ret = efivar_attr->show(var, buf);
}
return ret;
}
static ssize_t efivar_attr_store(struct kobject *kobj, struct attribute *attr,
const char *buf, size_t count)
{
struct efivar_entry *var = to_efivar_entry(kobj);
struct efivar_attribute *efivar_attr = to_efivar_attr(attr);
ssize_t ret = -EIO;
if (!capable(CAP_SYS_ADMIN))
return -EACCES;
if (efivar_attr->store)
ret = efivar_attr->store(var, buf, count);
return ret;
}
static const struct sysfs_ops efivar_attr_ops = {
.show = efivar_attr_show,
.store = efivar_attr_store,
};
static void efivar_release(struct kobject *kobj)
{
struct efivar_entry *var = to_efivar_entry(kobj);
kfree(var);
}
static EFIVAR_ATTR(guid, 0400, efivar_guid_read, NULL);
static EFIVAR_ATTR(attributes, 0400, efivar_attr_read, NULL);
static EFIVAR_ATTR(size, 0400, efivar_size_read, NULL);
static EFIVAR_ATTR(data, 0400, efivar_data_read, NULL);
static EFIVAR_ATTR(raw_var, 0600, efivar_show_raw, efivar_store_raw);
static struct attribute *def_attrs[] = {
&efivar_attr_guid.attr,
&efivar_attr_size.attr,
&efivar_attr_attributes.attr,
&efivar_attr_data.attr,
&efivar_attr_raw_var.attr,
NULL,
};
static struct kobj_type efivar_ktype = {
.release = efivar_release,
.sysfs_ops = &efivar_attr_ops,
.default_attrs = def_attrs,
};
static ssize_t efivar_create(struct file *filp, struct kobject *kobj,
struct bin_attribute *bin_attr,
char *buf, loff_t pos, size_t count)
{
struct compat_efi_variable *compat = (struct compat_efi_variable *)buf;
struct efi_variable *new_var = (struct efi_variable *)buf;
struct efivar_entry *new_entry;
bool need_compat = in_compat_syscall();
efi_char16_t *name;
unsigned long size;
u32 attributes;
u8 *data;
int err;
if (!capable(CAP_SYS_ADMIN))
return -EACCES;
if (need_compat) {
if (count != sizeof(*compat))
return -EINVAL;
attributes = compat->Attributes;
name = compat->VariableName;
size = compat->DataSize;
data = compat->Data;
} else {
if (count != sizeof(*new_var))
return -EINVAL;
attributes = new_var->Attributes;
name = new_var->VariableName;
size = new_var->DataSize;
data = new_var->Data;
}
if ((attributes & ~EFI_VARIABLE_MASK) != 0 ||
efivar_validate(new_var->VendorGuid, name, data,
size) == false) {
printk(KERN_ERR "efivars: Malformed variable content\n");
return -EINVAL;
}
new_entry = kzalloc(sizeof(*new_entry), GFP_KERNEL);
if (!new_entry)
return -ENOMEM;
if (need_compat)
copy_out_compat(&new_entry->var, compat);
else
memcpy(&new_entry->var, new_var, sizeof(*new_var));
err = efivar_entry_set(new_entry, attributes, size,
data, &efivar_sysfs_list);
if (err) {
if (err == -EEXIST)
err = -EINVAL;
goto out;
}
if (efivar_create_sysfs_entry(new_entry)) {
printk(KERN_WARNING "efivars: failed to create sysfs entry.\n");
kfree(new_entry);
}
return count;
out:
kfree(new_entry);
return err;
}
static ssize_t efivar_delete(struct file *filp, struct kobject *kobj,
struct bin_attribute *bin_attr,
char *buf, loff_t pos, size_t count)
{
struct efi_variable *del_var = (struct efi_variable *)buf;
struct compat_efi_variable *compat;
struct efivar_entry *entry;
efi_char16_t *name;
efi_guid_t vendor;
int err = 0;
if (!capable(CAP_SYS_ADMIN))
return -EACCES;
if (in_compat_syscall()) {
if (count != sizeof(*compat))
return -EINVAL;
compat = (struct compat_efi_variable *)buf;
name = compat->VariableName;
vendor = compat->VendorGuid;
} else {
if (count != sizeof(*del_var))
return -EINVAL;
name = del_var->VariableName;
vendor = del_var->VendorGuid;
}
if (efivar_entry_iter_begin())
return -EINTR;
entry = efivar_entry_find(name, vendor, &efivar_sysfs_list, true);
if (!entry)
err = -EINVAL;
else if (__efivar_entry_delete(entry))
err = -EIO;
efivars, efi-pstore: Hold off deletion of sysfs entry until the scan is completed Currently, when mounting pstore file system, a read callback of efi_pstore driver runs mutiple times as below. - In the first read callback, scan efivar_sysfs_list from head and pass a kmsg buffer of a entry to an upper pstore layer. - In the second read callback, rescan efivar_sysfs_list from the entry and pass another kmsg buffer to it. - Repeat the scan and pass until the end of efivar_sysfs_list. In this process, an entry is read across the multiple read function calls. To avoid race between the read and erasion, the whole process above is protected by a spinlock, holding in open() and releasing in close(). At the same time, kmemdup() is called to pass the buffer to pstore filesystem during it. And then, it causes a following lockdep warning. To make the dynamic memory allocation runnable without taking spinlock, holding off a deletion of sysfs entry if it happens while scanning it via efi_pstore, and deleting it after the scan is completed. To implement it, this patch introduces two flags, scanning and deleting, to efivar_entry. On the code basis, it seems that all the scanning and deleting logic is not needed because __efivars->lock are not dropped when reading from the EFI variable store. But, the scanning and deleting logic is still needed because an efi-pstore and a pstore filesystem works as follows. In case an entry(A) is found, the pointer is saved to psi->data. And efi_pstore_read() passes the entry(A) to a pstore filesystem by releasing __efivars->lock. And then, the pstore filesystem calls efi_pstore_read() again and the same entry(A), which is saved to psi->data, is used for resuming to scan a sysfs-list. So, to protect the entry(A), the logic is needed. [ 1.143710] ------------[ cut here ]------------ [ 1.144058] WARNING: CPU: 1 PID: 1 at kernel/lockdep.c:2740 lockdep_trace_alloc+0x104/0x110() [ 1.144058] DEBUG_LOCKS_WARN_ON(irqs_disabled_flags(flags)) [ 1.144058] Modules linked in: [ 1.144058] CPU: 1 PID: 1 Comm: systemd Not tainted 3.11.0-rc5 #2 [ 1.144058] 0000000000000009 ffff8800797e9ae0 ffffffff816614a5 ffff8800797e9b28 [ 1.144058] ffff8800797e9b18 ffffffff8105510d 0000000000000080 0000000000000046 [ 1.144058] 00000000000000d0 00000000000003af ffffffff81ccd0c0 ffff8800797e9b78 [ 1.144058] Call Trace: [ 1.144058] [<ffffffff816614a5>] dump_stack+0x54/0x74 [ 1.144058] [<ffffffff8105510d>] warn_slowpath_common+0x7d/0xa0 [ 1.144058] [<ffffffff8105517c>] warn_slowpath_fmt+0x4c/0x50 [ 1.144058] [<ffffffff8131290f>] ? vsscanf+0x57f/0x7b0 [ 1.144058] [<ffffffff810bbd74>] lockdep_trace_alloc+0x104/0x110 [ 1.144058] [<ffffffff81192da0>] __kmalloc_track_caller+0x50/0x280 [ 1.144058] [<ffffffff815147bb>] ? efi_pstore_read_func.part.1+0x12b/0x170 [ 1.144058] [<ffffffff8115b260>] kmemdup+0x20/0x50 [ 1.144058] [<ffffffff815147bb>] efi_pstore_read_func.part.1+0x12b/0x170 [ 1.144058] [<ffffffff81514800>] ? efi_pstore_read_func.part.1+0x170/0x170 [ 1.144058] [<ffffffff815148b4>] efi_pstore_read_func+0xb4/0xe0 [ 1.144058] [<ffffffff81512b7b>] __efivar_entry_iter+0xfb/0x120 [ 1.144058] [<ffffffff8151428f>] efi_pstore_read+0x3f/0x50 [ 1.144058] [<ffffffff8128d7ba>] pstore_get_records+0x9a/0x150 [ 1.158207] [<ffffffff812af25c>] ? selinux_d_instantiate+0x1c/0x20 [ 1.158207] [<ffffffff8128ce30>] ? parse_options+0x80/0x80 [ 1.158207] [<ffffffff8128ced5>] pstore_fill_super+0xa5/0xc0 [ 1.158207] [<ffffffff811ae7d2>] mount_single+0xa2/0xd0 [ 1.158207] [<ffffffff8128ccf8>] pstore_mount+0x18/0x20 [ 1.158207] [<ffffffff811ae8b9>] mount_fs+0x39/0x1b0 [ 1.158207] [<ffffffff81160550>] ? __alloc_percpu+0x10/0x20 [ 1.158207] [<ffffffff811c9493>] vfs_kern_mount+0x63/0xf0 [ 1.158207] [<ffffffff811cbb0e>] do_mount+0x23e/0xa20 [ 1.158207] [<ffffffff8115b51b>] ? strndup_user+0x4b/0xf0 [ 1.158207] [<ffffffff811cc373>] SyS_mount+0x83/0xc0 [ 1.158207] [<ffffffff81673cc2>] system_call_fastpath+0x16/0x1b [ 1.158207] ---[ end trace 61981bc62de9f6f4 ]--- Signed-off-by: Seiji Aguchi <seiji.aguchi@hds.com> Tested-by: Madper Xie <cxie@redhat.com> Cc: stable@kernel.org Signed-off-by: Matt Fleming <matt.fleming@intel.com>
2013-10-31 03:27:26 +08:00
if (err) {
efivar_entry_iter_end();
return err;
efivars, efi-pstore: Hold off deletion of sysfs entry until the scan is completed Currently, when mounting pstore file system, a read callback of efi_pstore driver runs mutiple times as below. - In the first read callback, scan efivar_sysfs_list from head and pass a kmsg buffer of a entry to an upper pstore layer. - In the second read callback, rescan efivar_sysfs_list from the entry and pass another kmsg buffer to it. - Repeat the scan and pass until the end of efivar_sysfs_list. In this process, an entry is read across the multiple read function calls. To avoid race between the read and erasion, the whole process above is protected by a spinlock, holding in open() and releasing in close(). At the same time, kmemdup() is called to pass the buffer to pstore filesystem during it. And then, it causes a following lockdep warning. To make the dynamic memory allocation runnable without taking spinlock, holding off a deletion of sysfs entry if it happens while scanning it via efi_pstore, and deleting it after the scan is completed. To implement it, this patch introduces two flags, scanning and deleting, to efivar_entry. On the code basis, it seems that all the scanning and deleting logic is not needed because __efivars->lock are not dropped when reading from the EFI variable store. But, the scanning and deleting logic is still needed because an efi-pstore and a pstore filesystem works as follows. In case an entry(A) is found, the pointer is saved to psi->data. And efi_pstore_read() passes the entry(A) to a pstore filesystem by releasing __efivars->lock. And then, the pstore filesystem calls efi_pstore_read() again and the same entry(A), which is saved to psi->data, is used for resuming to scan a sysfs-list. So, to protect the entry(A), the logic is needed. [ 1.143710] ------------[ cut here ]------------ [ 1.144058] WARNING: CPU: 1 PID: 1 at kernel/lockdep.c:2740 lockdep_trace_alloc+0x104/0x110() [ 1.144058] DEBUG_LOCKS_WARN_ON(irqs_disabled_flags(flags)) [ 1.144058] Modules linked in: [ 1.144058] CPU: 1 PID: 1 Comm: systemd Not tainted 3.11.0-rc5 #2 [ 1.144058] 0000000000000009 ffff8800797e9ae0 ffffffff816614a5 ffff8800797e9b28 [ 1.144058] ffff8800797e9b18 ffffffff8105510d 0000000000000080 0000000000000046 [ 1.144058] 00000000000000d0 00000000000003af ffffffff81ccd0c0 ffff8800797e9b78 [ 1.144058] Call Trace: [ 1.144058] [<ffffffff816614a5>] dump_stack+0x54/0x74 [ 1.144058] [<ffffffff8105510d>] warn_slowpath_common+0x7d/0xa0 [ 1.144058] [<ffffffff8105517c>] warn_slowpath_fmt+0x4c/0x50 [ 1.144058] [<ffffffff8131290f>] ? vsscanf+0x57f/0x7b0 [ 1.144058] [<ffffffff810bbd74>] lockdep_trace_alloc+0x104/0x110 [ 1.144058] [<ffffffff81192da0>] __kmalloc_track_caller+0x50/0x280 [ 1.144058] [<ffffffff815147bb>] ? efi_pstore_read_func.part.1+0x12b/0x170 [ 1.144058] [<ffffffff8115b260>] kmemdup+0x20/0x50 [ 1.144058] [<ffffffff815147bb>] efi_pstore_read_func.part.1+0x12b/0x170 [ 1.144058] [<ffffffff81514800>] ? efi_pstore_read_func.part.1+0x170/0x170 [ 1.144058] [<ffffffff815148b4>] efi_pstore_read_func+0xb4/0xe0 [ 1.144058] [<ffffffff81512b7b>] __efivar_entry_iter+0xfb/0x120 [ 1.144058] [<ffffffff8151428f>] efi_pstore_read+0x3f/0x50 [ 1.144058] [<ffffffff8128d7ba>] pstore_get_records+0x9a/0x150 [ 1.158207] [<ffffffff812af25c>] ? selinux_d_instantiate+0x1c/0x20 [ 1.158207] [<ffffffff8128ce30>] ? parse_options+0x80/0x80 [ 1.158207] [<ffffffff8128ced5>] pstore_fill_super+0xa5/0xc0 [ 1.158207] [<ffffffff811ae7d2>] mount_single+0xa2/0xd0 [ 1.158207] [<ffffffff8128ccf8>] pstore_mount+0x18/0x20 [ 1.158207] [<ffffffff811ae8b9>] mount_fs+0x39/0x1b0 [ 1.158207] [<ffffffff81160550>] ? __alloc_percpu+0x10/0x20 [ 1.158207] [<ffffffff811c9493>] vfs_kern_mount+0x63/0xf0 [ 1.158207] [<ffffffff811cbb0e>] do_mount+0x23e/0xa20 [ 1.158207] [<ffffffff8115b51b>] ? strndup_user+0x4b/0xf0 [ 1.158207] [<ffffffff811cc373>] SyS_mount+0x83/0xc0 [ 1.158207] [<ffffffff81673cc2>] system_call_fastpath+0x16/0x1b [ 1.158207] ---[ end trace 61981bc62de9f6f4 ]--- Signed-off-by: Seiji Aguchi <seiji.aguchi@hds.com> Tested-by: Madper Xie <cxie@redhat.com> Cc: stable@kernel.org Signed-off-by: Matt Fleming <matt.fleming@intel.com>
2013-10-31 03:27:26 +08:00
}
efivars, efi-pstore: Hold off deletion of sysfs entry until the scan is completed Currently, when mounting pstore file system, a read callback of efi_pstore driver runs mutiple times as below. - In the first read callback, scan efivar_sysfs_list from head and pass a kmsg buffer of a entry to an upper pstore layer. - In the second read callback, rescan efivar_sysfs_list from the entry and pass another kmsg buffer to it. - Repeat the scan and pass until the end of efivar_sysfs_list. In this process, an entry is read across the multiple read function calls. To avoid race between the read and erasion, the whole process above is protected by a spinlock, holding in open() and releasing in close(). At the same time, kmemdup() is called to pass the buffer to pstore filesystem during it. And then, it causes a following lockdep warning. To make the dynamic memory allocation runnable without taking spinlock, holding off a deletion of sysfs entry if it happens while scanning it via efi_pstore, and deleting it after the scan is completed. To implement it, this patch introduces two flags, scanning and deleting, to efivar_entry. On the code basis, it seems that all the scanning and deleting logic is not needed because __efivars->lock are not dropped when reading from the EFI variable store. But, the scanning and deleting logic is still needed because an efi-pstore and a pstore filesystem works as follows. In case an entry(A) is found, the pointer is saved to psi->data. And efi_pstore_read() passes the entry(A) to a pstore filesystem by releasing __efivars->lock. And then, the pstore filesystem calls efi_pstore_read() again and the same entry(A), which is saved to psi->data, is used for resuming to scan a sysfs-list. So, to protect the entry(A), the logic is needed. [ 1.143710] ------------[ cut here ]------------ [ 1.144058] WARNING: CPU: 1 PID: 1 at kernel/lockdep.c:2740 lockdep_trace_alloc+0x104/0x110() [ 1.144058] DEBUG_LOCKS_WARN_ON(irqs_disabled_flags(flags)) [ 1.144058] Modules linked in: [ 1.144058] CPU: 1 PID: 1 Comm: systemd Not tainted 3.11.0-rc5 #2 [ 1.144058] 0000000000000009 ffff8800797e9ae0 ffffffff816614a5 ffff8800797e9b28 [ 1.144058] ffff8800797e9b18 ffffffff8105510d 0000000000000080 0000000000000046 [ 1.144058] 00000000000000d0 00000000000003af ffffffff81ccd0c0 ffff8800797e9b78 [ 1.144058] Call Trace: [ 1.144058] [<ffffffff816614a5>] dump_stack+0x54/0x74 [ 1.144058] [<ffffffff8105510d>] warn_slowpath_common+0x7d/0xa0 [ 1.144058] [<ffffffff8105517c>] warn_slowpath_fmt+0x4c/0x50 [ 1.144058] [<ffffffff8131290f>] ? vsscanf+0x57f/0x7b0 [ 1.144058] [<ffffffff810bbd74>] lockdep_trace_alloc+0x104/0x110 [ 1.144058] [<ffffffff81192da0>] __kmalloc_track_caller+0x50/0x280 [ 1.144058] [<ffffffff815147bb>] ? efi_pstore_read_func.part.1+0x12b/0x170 [ 1.144058] [<ffffffff8115b260>] kmemdup+0x20/0x50 [ 1.144058] [<ffffffff815147bb>] efi_pstore_read_func.part.1+0x12b/0x170 [ 1.144058] [<ffffffff81514800>] ? efi_pstore_read_func.part.1+0x170/0x170 [ 1.144058] [<ffffffff815148b4>] efi_pstore_read_func+0xb4/0xe0 [ 1.144058] [<ffffffff81512b7b>] __efivar_entry_iter+0xfb/0x120 [ 1.144058] [<ffffffff8151428f>] efi_pstore_read+0x3f/0x50 [ 1.144058] [<ffffffff8128d7ba>] pstore_get_records+0x9a/0x150 [ 1.158207] [<ffffffff812af25c>] ? selinux_d_instantiate+0x1c/0x20 [ 1.158207] [<ffffffff8128ce30>] ? parse_options+0x80/0x80 [ 1.158207] [<ffffffff8128ced5>] pstore_fill_super+0xa5/0xc0 [ 1.158207] [<ffffffff811ae7d2>] mount_single+0xa2/0xd0 [ 1.158207] [<ffffffff8128ccf8>] pstore_mount+0x18/0x20 [ 1.158207] [<ffffffff811ae8b9>] mount_fs+0x39/0x1b0 [ 1.158207] [<ffffffff81160550>] ? __alloc_percpu+0x10/0x20 [ 1.158207] [<ffffffff811c9493>] vfs_kern_mount+0x63/0xf0 [ 1.158207] [<ffffffff811cbb0e>] do_mount+0x23e/0xa20 [ 1.158207] [<ffffffff8115b51b>] ? strndup_user+0x4b/0xf0 [ 1.158207] [<ffffffff811cc373>] SyS_mount+0x83/0xc0 [ 1.158207] [<ffffffff81673cc2>] system_call_fastpath+0x16/0x1b [ 1.158207] ---[ end trace 61981bc62de9f6f4 ]--- Signed-off-by: Seiji Aguchi <seiji.aguchi@hds.com> Tested-by: Madper Xie <cxie@redhat.com> Cc: stable@kernel.org Signed-off-by: Matt Fleming <matt.fleming@intel.com>
2013-10-31 03:27:26 +08:00
if (!entry->scanning) {
efivar_entry_iter_end();
efivar_unregister(entry);
} else
efivar_entry_iter_end();
/* It's dead Jim.... */
return count;
}
/**
* efivar_create_sysfs_entry - create a new entry in sysfs
* @new_var: efivar entry to create
*
* Returns 0 on success, negative error code on failure
*/
static int
efivar_create_sysfs_entry(struct efivar_entry *new_var)
{
int short_name_size;
char *short_name;
unsigned long utf8_name_size;
efi_char16_t *variable_name = new_var->var.VariableName;
int ret;
/*
* Length of the variable bytes in UTF8, plus the '-' separator,
* plus the GUID, plus trailing NUL
*/
utf8_name_size = ucs2_utf8size(variable_name);
short_name_size = utf8_name_size + 1 + EFI_VARIABLE_GUID_LEN + 1;
short_name = kmalloc(short_name_size, GFP_KERNEL);
if (!short_name)
return -ENOMEM;
ucs2_as_utf8(short_name, variable_name, short_name_size);
/* This is ugly, but necessary to separate one vendor's
private variables from another's. */
short_name[utf8_name_size] = '-';
efi_guid_to_str(&new_var->var.VendorGuid,
short_name + utf8_name_size + 1);
new_var->kobj.kset = efivars_kset;
ret = kobject_init_and_add(&new_var->kobj, &efivar_ktype,
NULL, "%s", short_name);
kfree(short_name);
if (ret)
return ret;
kobject_uevent(&new_var->kobj, KOBJ_ADD);
if (efivar_entry_add(new_var, &efivar_sysfs_list)) {
efivar_unregister(new_var);
return -EINTR;
}
return 0;
}
static int
create_efivars_bin_attributes(void)
{
struct bin_attribute *attr;
int error;
/* new_var */
attr = kzalloc(sizeof(*attr), GFP_KERNEL);
if (!attr)
return -ENOMEM;
attr->attr.name = "new_var";
attr->attr.mode = 0200;
attr->write = efivar_create;
efivars_new_var = attr;
/* del_var */
attr = kzalloc(sizeof(*attr), GFP_KERNEL);
if (!attr) {
error = -ENOMEM;
goto out_free;
}
attr->attr.name = "del_var";
attr->attr.mode = 0200;
attr->write = efivar_delete;
efivars_del_var = attr;
sysfs_bin_attr_init(efivars_new_var);
sysfs_bin_attr_init(efivars_del_var);
/* Register */
error = sysfs_create_bin_file(&efivars_kset->kobj, efivars_new_var);
if (error) {
printk(KERN_ERR "efivars: unable to create new_var sysfs file"
" due to error %d\n", error);
goto out_free;
}
error = sysfs_create_bin_file(&efivars_kset->kobj, efivars_del_var);
if (error) {
printk(KERN_ERR "efivars: unable to create del_var sysfs file"
" due to error %d\n", error);
sysfs_remove_bin_file(&efivars_kset->kobj, efivars_new_var);
goto out_free;
}
return 0;
out_free:
kfree(efivars_del_var);
efivars_del_var = NULL;
kfree(efivars_new_var);
efivars_new_var = NULL;
return error;
}
static int efivar_update_sysfs_entry(efi_char16_t *name, efi_guid_t vendor,
unsigned long name_size, void *data)
{
struct efivar_entry *entry = data;
if (efivar_entry_find(name, vendor, &efivar_sysfs_list, false))
return 0;
memcpy(entry->var.VariableName, name, name_size);
memcpy(&(entry->var.VendorGuid), &vendor, sizeof(efi_guid_t));
return 1;
}
static void efivar_update_sysfs_entries(struct work_struct *work)
{
struct efivar_entry *entry;
int err;
/* Add new sysfs entries */
while (1) {
efivar: fix oops in efivar_update_sysfs_entries() caused by memory reuse The loop in efivar_update_sysfs_entries() reuses the same allocation for entries each time it calls efivar_create_sysfs_entry(entry). This is wrong because efivar_create_sysfs_entry() expects to keep the memory it was passed, so the caller may not free it (and may not pass the same memory in multiple times). This leads to the oops below. Fix by getting a new allocation each time we go around the loop. ---[ end trace ba4907d5c519d111 ]--- BUG: unable to handle kernel NULL pointer dereference at (null) IP: [<ffffffff8142f81f>] efivar_entry_find+0x14f/0x2d0 PGD 0 Oops: 0000 [#2] SMP Modules linked in: oops(OF+) ebtable_nat ebtables xt_CHECKSUM [...] CPU: 0 PID: 301 Comm: kworker/0:2 Tainted: GF D O 3.9.0+ #1 Hardware name: LENOVO 4291EV7/4291EV7, BIOS 8DET52WW (1.22 ) 09/15/2011 Workqueue: events efivar_update_sysfs_entries task: ffff8801955920c0 ti: ffff88019413e000 task.ti: ffff88019413e000 RIP: 0010:[<ffffffff8142f81f>] [<ffffffff8142f81f>] efivar_entry_find+0x14f/0x2d0 RSP: 0018:ffff88019413fa48 EFLAGS: 00010006 RAX: 0000000000000000 RBX: ffff880195d87c00 RCX: ffffffff81ab6f60 RDX: ffff88019413fb88 RSI: 0000000000000400 RDI: ffff880196254000 RBP: ffff88019413fbd8 R08: 0000000000000000 R09: ffff8800dad99037 R10: ffff880195d87c00 R11: 0000000000000430 R12: ffffffff81ab6f60 R13: fffffffffffff7d8 R14: ffff880196254000 R15: 0000000000000000 FS: 0000000000000000(0000) GS:ffff88019e200000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 0000000001a0b000 CR4: 00000000000407f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 Stack: ffff88019413fb78 ffff88019413fb88 ffffffff81e85d60 03000000972b5c00 ffff88019413fa29 ffffffff81e85d60 ffff88019413fbfb 0000000197087280 00000000000000fe 0000000000000001 ffffffff81e85dd9 ffff880197087280 Call Trace: [<ffffffff81254371>] ? idr_get_empty_slot+0x131/0x240 [<ffffffff8125b6d2>] ? put_dec+0x72/0x90 [<ffffffff81158e40>] ? cache_alloc_refill+0x170/0x2f0 [<ffffffff81430420>] efivar_update_sysfs_entry+0x150/0x220 [<ffffffff8103dd29>] ? efi_call2+0x9/0x70 [<ffffffff8103d787>] ? virt_efi_get_next_variable+0x47/0x1b0 [<ffffffff8115a8df>] ? kmem_cache_alloc_trace+0x1af/0x1c0 [<ffffffff81430033>] efivar_init+0x2c3/0x380 [<ffffffff814302d0>] ? efivar_delete+0xd0/0xd0 [<ffffffff8143111f>] efivar_update_sysfs_entries+0x6f/0x90 [<ffffffff810605f3>] process_one_work+0x183/0x490 [<ffffffff81061780>] worker_thread+0x120/0x3a0 [<ffffffff81061660>] ? manage_workers+0x160/0x160 [<ffffffff8106752e>] kthread+0xce/0xe0 [<ffffffff81067460>] ? kthread_freezable_should_stop+0x70/0x70 [<ffffffff81543c5c>] ret_from_fork+0x7c/0xb0 [<ffffffff81067460>] ? kthread_freezable_should_stop+0x70/0x70 Code: 8d 55 b0 48 8d 45 a0 49 81 ed 28 08 00 00 48 89 95 78 fe [...] RIP [<ffffffff8142f81f>] efivar_entry_find+0x14f/0x2d0 RSP <ffff88019413fa48> CR2: 0000000000000000 ---[ end trace ba4907d5c519d112 ]--- Cc: James Bottomley <James.Bottomley@HansenPartnership.com> Cc: Tomoki Sekiyama <tomoki.sekiyama@hds.com> Signed-off-by: Seiji Aguchi <seiji.aguchi@hds.com> Signed-off-by: Matt Fleming <matt.fleming@intel.com>
2013-05-11 04:45:36 +08:00
entry = kzalloc(sizeof(*entry), GFP_KERNEL);
if (!entry)
return;
err = efivar_init(efivar_update_sysfs_entry, entry,
false, &efivar_sysfs_list);
if (!err)
break;
efivar_create_sysfs_entry(entry);
}
kfree(entry);
}
static int efivars_sysfs_callback(efi_char16_t *name, efi_guid_t vendor,
unsigned long name_size, void *data)
{
struct efivar_entry *entry;
entry = kzalloc(sizeof(*entry), GFP_KERNEL);
if (!entry)
return -ENOMEM;
memcpy(entry->var.VariableName, name, name_size);
memcpy(&(entry->var.VendorGuid), &vendor, sizeof(efi_guid_t));
efivar_create_sysfs_entry(entry);
return 0;
}
static int efivar_sysfs_destroy(struct efivar_entry *entry, void *data)
{
int err = efivar_entry_remove(entry);
if (err)
return err;
efivar_unregister(entry);
return 0;
}
static void efivars_sysfs_exit(void)
{
/* Remove all entries and destroy */
int err;
err = __efivar_entry_iter(efivar_sysfs_destroy, &efivar_sysfs_list,
NULL, NULL);
if (err) {
pr_err("efivars: Failed to destroy sysfs entries\n");
return;
}
if (efivars_new_var)
sysfs_remove_bin_file(&efivars_kset->kobj, efivars_new_var);
if (efivars_del_var)
sysfs_remove_bin_file(&efivars_kset->kobj, efivars_del_var);
kfree(efivars_new_var);
kfree(efivars_del_var);
kset_unregister(efivars_kset);
}
int efivars_sysfs_init(void)
{
struct kobject *parent_kobj = efivars_kobject();
int error = 0;
if (!efi_enabled(EFI_RUNTIME_SERVICES))
return -ENODEV;
/* No efivars has been registered yet */
if (!parent_kobj)
return 0;
printk(KERN_INFO "EFI Variables Facility v%s %s\n", EFIVARS_VERSION,
EFIVARS_DATE);
efivars_kset = kset_create_and_add("vars", NULL, parent_kobj);
if (!efivars_kset) {
printk(KERN_ERR "efivars: Subsystem registration failed.\n");
return -ENOMEM;
}
efivar_init(efivars_sysfs_callback, NULL, true, &efivar_sysfs_list);
error = create_efivars_bin_attributes();
if (error) {
efivars_sysfs_exit();
return error;
}
INIT_WORK(&efivar_work, efivar_update_sysfs_entries);
return 0;
}
EXPORT_SYMBOL_GPL(efivars_sysfs_init);
module_init(efivars_sysfs_init);
module_exit(efivars_sysfs_exit);