2019-05-29 22:17:56 +08:00
|
|
|
// SPDX-License-Identifier: GPL-2.0-only
|
2015-01-17 08:01:10 +08:00
|
|
|
/*
|
|
|
|
* Copyright 2014 Google, Inc.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include <linux/cdev.h>
|
|
|
|
#include <linux/device.h>
|
|
|
|
#include <linux/fs.h>
|
|
|
|
#include <linux/uaccess.h>
|
|
|
|
#include "internal.h"
|
|
|
|
|
pstore: Revert pmsg_lock back to a normal mutex
This reverts commit 76d62f24db07f22ccf9bc18ca793c27d4ebef721.
So while priority inversion on the pmsg_lock is an occasional
problem that an rt_mutex would help with, in uses where logging
is writing to pmsg heavily from multiple threads, the pmsg_lock
can be heavily contended.
After this change landed, it was reported that cases where the
mutex locking overhead was commonly adding on the order of 10s
of usecs delay had suddenly jumped to ~msec delay with rtmutex.
It seems the slight differences in the locks under this level
of contention causes the normal mutexes to utilize the spinning
optimizations, while the rtmutexes end up in the sleeping
slowpath (which allows additional threads to pile on trying
to take the lock).
In this case, it devolves to a worse case senerio where the lock
acquisition and scheduling overhead dominates, and each thread
is waiting on the order of ~ms to do ~us of work.
Obviously, having tons of threads all contending on a single
lock for logging is non-optimal, so the proper fix is probably
reworking pstore pmsg to have per-cpu buffers so we don't have
contention.
Additionally, Steven Rostedt has provided some furhter
optimizations for rtmutexes that improves the rtmutex spinning
path, but at least in my testing, I still see the test tripping
into the sleeping path on rtmutexes while utilizing the spinning
path with mutexes.
But in the short term, lets revert the change to the rt_mutex
and go back to normal mutexes to avoid a potentially major
performance regression. And we can work on optimizations to both
rtmutexes and finer-grained locking for pstore pmsg in the
future.
Cc: Wei Wang <wvw@google.com>
Cc: Midas Chien<midaschieh@google.com>
Cc: "Chunhui Li (李春辉)" <chunhui.li@mediatek.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Kees Cook <keescook@chromium.org>
Cc: Anton Vorontsov <anton@enomsg.org>
Cc: "Guilherme G. Piccoli" <gpiccoli@igalia.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: kernel-team@android.com
Fixes: 76d62f24db07 ("pstore: Switch pmsg_lock to an rt_mutex to avoid priority inversion")
Reported-by: "Chunhui Li (李春辉)" <chunhui.li@mediatek.com>
Signed-off-by: John Stultz <jstultz@google.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
Link: https://lore.kernel.org/r/20230308204043.2061631-1-jstultz@google.com
2023-03-09 04:40:43 +08:00
|
|
|
static DEFINE_MUTEX(pmsg_lock);
|
2015-01-17 08:01:10 +08:00
|
|
|
|
|
|
|
static ssize_t write_pmsg(struct file *file, const char __user *buf,
|
|
|
|
size_t count, loff_t *ppos)
|
|
|
|
{
|
2017-05-20 06:10:31 +08:00
|
|
|
struct pstore_record record;
|
2016-09-01 23:13:46 +08:00
|
|
|
int ret;
|
2015-01-17 08:01:10 +08:00
|
|
|
|
|
|
|
if (!count)
|
|
|
|
return 0;
|
|
|
|
|
2017-05-20 06:10:31 +08:00
|
|
|
pstore_record_init(&record, psinfo);
|
|
|
|
record.type = PSTORE_TYPE_PMSG;
|
|
|
|
record.size = count;
|
|
|
|
|
2017-03-06 14:41:10 +08:00
|
|
|
/* check outside lock, page in any data. write_user also checks */
|
Remove 'type' argument from access_ok() function
Nobody has actually used the type (VERIFY_READ vs VERIFY_WRITE) argument
of the user address range verification function since we got rid of the
old racy i386-only code to walk page tables by hand.
It existed because the original 80386 would not honor the write protect
bit when in kernel mode, so you had to do COW by hand before doing any
user access. But we haven't supported that in a long time, and these
days the 'type' argument is a purely historical artifact.
A discussion about extending 'user_access_begin()' to do the range
checking resulted this patch, because there is no way we're going to
move the old VERIFY_xyz interface to that model. And it's best done at
the end of the merge window when I've done most of my merges, so let's
just get this done once and for all.
This patch was mostly done with a sed-script, with manual fix-ups for
the cases that weren't of the trivial 'access_ok(VERIFY_xyz' form.
There were a couple of notable cases:
- csky still had the old "verify_area()" name as an alias.
- the iter_iov code had magical hardcoded knowledge of the actual
values of VERIFY_{READ,WRITE} (not that they mattered, since nothing
really used it)
- microblaze used the type argument for a debug printout
but other than those oddities this should be a total no-op patch.
I tried to fix up all architectures, did fairly extensive grepping for
access_ok() uses, and the changes are trivial, but I may have missed
something. Any missed conversion should be trivially fixable, though.
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2019-01-04 10:57:57 +08:00
|
|
|
if (!access_ok(buf, count))
|
2015-01-17 08:01:10 +08:00
|
|
|
return -EFAULT;
|
|
|
|
|
pstore: Revert pmsg_lock back to a normal mutex
This reverts commit 76d62f24db07f22ccf9bc18ca793c27d4ebef721.
So while priority inversion on the pmsg_lock is an occasional
problem that an rt_mutex would help with, in uses where logging
is writing to pmsg heavily from multiple threads, the pmsg_lock
can be heavily contended.
After this change landed, it was reported that cases where the
mutex locking overhead was commonly adding on the order of 10s
of usecs delay had suddenly jumped to ~msec delay with rtmutex.
It seems the slight differences in the locks under this level
of contention causes the normal mutexes to utilize the spinning
optimizations, while the rtmutexes end up in the sleeping
slowpath (which allows additional threads to pile on trying
to take the lock).
In this case, it devolves to a worse case senerio where the lock
acquisition and scheduling overhead dominates, and each thread
is waiting on the order of ~ms to do ~us of work.
Obviously, having tons of threads all contending on a single
lock for logging is non-optimal, so the proper fix is probably
reworking pstore pmsg to have per-cpu buffers so we don't have
contention.
Additionally, Steven Rostedt has provided some furhter
optimizations for rtmutexes that improves the rtmutex spinning
path, but at least in my testing, I still see the test tripping
into the sleeping path on rtmutexes while utilizing the spinning
path with mutexes.
But in the short term, lets revert the change to the rt_mutex
and go back to normal mutexes to avoid a potentially major
performance regression. And we can work on optimizations to both
rtmutexes and finer-grained locking for pstore pmsg in the
future.
Cc: Wei Wang <wvw@google.com>
Cc: Midas Chien<midaschieh@google.com>
Cc: "Chunhui Li (李春辉)" <chunhui.li@mediatek.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Kees Cook <keescook@chromium.org>
Cc: Anton Vorontsov <anton@enomsg.org>
Cc: "Guilherme G. Piccoli" <gpiccoli@igalia.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: kernel-team@android.com
Fixes: 76d62f24db07 ("pstore: Switch pmsg_lock to an rt_mutex to avoid priority inversion")
Reported-by: "Chunhui Li (李春辉)" <chunhui.li@mediatek.com>
Signed-off-by: John Stultz <jstultz@google.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
Link: https://lore.kernel.org/r/20230308204043.2061631-1-jstultz@google.com
2023-03-09 04:40:43 +08:00
|
|
|
mutex_lock(&pmsg_lock);
|
2017-03-06 14:41:10 +08:00
|
|
|
ret = psinfo->write_user(&record, buf);
|
pstore: Revert pmsg_lock back to a normal mutex
This reverts commit 76d62f24db07f22ccf9bc18ca793c27d4ebef721.
So while priority inversion on the pmsg_lock is an occasional
problem that an rt_mutex would help with, in uses where logging
is writing to pmsg heavily from multiple threads, the pmsg_lock
can be heavily contended.
After this change landed, it was reported that cases where the
mutex locking overhead was commonly adding on the order of 10s
of usecs delay had suddenly jumped to ~msec delay with rtmutex.
It seems the slight differences in the locks under this level
of contention causes the normal mutexes to utilize the spinning
optimizations, while the rtmutexes end up in the sleeping
slowpath (which allows additional threads to pile on trying
to take the lock).
In this case, it devolves to a worse case senerio where the lock
acquisition and scheduling overhead dominates, and each thread
is waiting on the order of ~ms to do ~us of work.
Obviously, having tons of threads all contending on a single
lock for logging is non-optimal, so the proper fix is probably
reworking pstore pmsg to have per-cpu buffers so we don't have
contention.
Additionally, Steven Rostedt has provided some furhter
optimizations for rtmutexes that improves the rtmutex spinning
path, but at least in my testing, I still see the test tripping
into the sleeping path on rtmutexes while utilizing the spinning
path with mutexes.
But in the short term, lets revert the change to the rt_mutex
and go back to normal mutexes to avoid a potentially major
performance regression. And we can work on optimizations to both
rtmutexes and finer-grained locking for pstore pmsg in the
future.
Cc: Wei Wang <wvw@google.com>
Cc: Midas Chien<midaschieh@google.com>
Cc: "Chunhui Li (李春辉)" <chunhui.li@mediatek.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Kees Cook <keescook@chromium.org>
Cc: Anton Vorontsov <anton@enomsg.org>
Cc: "Guilherme G. Piccoli" <gpiccoli@igalia.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: kernel-team@android.com
Fixes: 76d62f24db07 ("pstore: Switch pmsg_lock to an rt_mutex to avoid priority inversion")
Reported-by: "Chunhui Li (李春辉)" <chunhui.li@mediatek.com>
Signed-off-by: John Stultz <jstultz@google.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
Link: https://lore.kernel.org/r/20230308204043.2061631-1-jstultz@google.com
2023-03-09 04:40:43 +08:00
|
|
|
mutex_unlock(&pmsg_lock);
|
2016-09-01 23:13:46 +08:00
|
|
|
return ret ? ret : count;
|
2015-01-17 08:01:10 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static const struct file_operations pmsg_fops = {
|
|
|
|
.owner = THIS_MODULE,
|
|
|
|
.llseek = noop_llseek,
|
|
|
|
.write = write_pmsg,
|
|
|
|
};
|
|
|
|
|
|
|
|
static struct class *pmsg_class;
|
|
|
|
static int pmsg_major;
|
|
|
|
#define PMSG_NAME "pmsg"
|
|
|
|
#undef pr_fmt
|
|
|
|
#define pr_fmt(fmt) PMSG_NAME ": " fmt
|
|
|
|
|
2022-11-23 20:25:20 +08:00
|
|
|
static char *pmsg_devnode(const struct device *dev, umode_t *mode)
|
2015-01-17 08:01:10 +08:00
|
|
|
{
|
|
|
|
if (mode)
|
|
|
|
*mode = 0220;
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
void pstore_register_pmsg(void)
|
|
|
|
{
|
|
|
|
struct device *pmsg_device;
|
|
|
|
|
|
|
|
pmsg_major = register_chrdev(0, PMSG_NAME, &pmsg_fops);
|
|
|
|
if (pmsg_major < 0) {
|
|
|
|
pr_err("register_chrdev failed\n");
|
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
|
2023-03-14 02:18:35 +08:00
|
|
|
pmsg_class = class_create(PMSG_NAME);
|
2015-01-17 08:01:10 +08:00
|
|
|
if (IS_ERR(pmsg_class)) {
|
|
|
|
pr_err("device class file already in use\n");
|
|
|
|
goto err_class;
|
|
|
|
}
|
|
|
|
pmsg_class->devnode = pmsg_devnode;
|
|
|
|
|
|
|
|
pmsg_device = device_create(pmsg_class, NULL, MKDEV(pmsg_major, 0),
|
|
|
|
NULL, "%s%d", PMSG_NAME, 0);
|
|
|
|
if (IS_ERR(pmsg_device)) {
|
|
|
|
pr_err("failed to create device\n");
|
|
|
|
goto err_device;
|
|
|
|
}
|
|
|
|
return;
|
|
|
|
|
|
|
|
err_device:
|
|
|
|
class_destroy(pmsg_class);
|
|
|
|
err_class:
|
|
|
|
unregister_chrdev(pmsg_major, PMSG_NAME);
|
|
|
|
err:
|
|
|
|
return;
|
|
|
|
}
|
2015-10-20 15:39:03 +08:00
|
|
|
|
|
|
|
void pstore_unregister_pmsg(void)
|
|
|
|
{
|
|
|
|
device_destroy(pmsg_class, MKDEV(pmsg_major, 0));
|
|
|
|
class_destroy(pmsg_class);
|
|
|
|
unregister_chrdev(pmsg_major, PMSG_NAME);
|
|
|
|
}
|