From fdd2c1f4e2c2e2fee1d95d18b4b06bc0d15c368a Mon Sep 17 00:00:00 2001 From: John Ogness Date: Wed, 3 Mar 2021 11:15:14 +0100 Subject: [PATCH 01/24] um: synchronize kmsg_dumper The kmsg_dumper can be called from any context and CPU, possibly from multiple CPUs simultaneously. Since a static buffer is used to retrieve the kernel logs, this buffer must be protected against simultaneous dumping. Skip dumping if another context is already dumping. Signed-off-by: John Ogness Reviewed-by: Petr Mladek Signed-off-by: Petr Mladek Link: https://lore.kernel.org/r/20210303101528.29901-2-john.ogness@linutronix.de --- arch/um/kernel/kmsg_dump.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/um/kernel/kmsg_dump.c b/arch/um/kernel/kmsg_dump.c index 6516ef1f8274..a765d235e50e 100644 --- a/arch/um/kernel/kmsg_dump.c +++ b/arch/um/kernel/kmsg_dump.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #include +#include #include #include #include @@ -9,8 +10,10 @@ static void kmsg_dumper_stdout(struct kmsg_dumper *dumper, enum kmsg_dump_reason reason) { + static DEFINE_SPINLOCK(lock); static char line[1024]; struct console *con; + unsigned long flags; size_t len = 0; /* only dump kmsg when no console is available */ @@ -29,11 +32,16 @@ static void kmsg_dumper_stdout(struct kmsg_dumper *dumper, if (con) return; + if (!spin_trylock_irqsave(&lock, flags)) + return; + printf("kmsg_dump:\n"); while (kmsg_dump_get_line(dumper, true, line, sizeof(line), &len)) { line[len] = '\0'; printf("%s", line); } + + spin_unlock_irqrestore(&lock, flags); } static struct kmsg_dumper kmsg_dumper = { From 40ddbbac7f16eb91f9f2bbc414f283102052ce14 Mon Sep 17 00:00:00 2001 From: John Ogness Date: Wed, 3 Mar 2021 11:15:15 +0100 Subject: [PATCH 02/24] mtd: mtdoops: synchronize kmsg_dumper The kmsg_dumper can be called from any context and CPU, possibly from multiple CPUs simultaneously. Since the writing of the buffer can occur from a later scheduled work queue, the oops buffer must be protected against simultaneous dumping. Use an atomic bit to mark when the buffer is protected. Release the protection in between setting the buffer and the actual writing in order for a possible panic (immediate write) to be written during the scheduling of a previous oops (delayed write). An atomic bit (rather than a spinlock) was chosen so that no scheduling or preemption side-effects would be introduced. The MTD kmsg_dumper may dump directly or it may be delayed (via scheduled work). Depending on the context, different MTD callbacks are used. For example, mtd_write() expects to be called in a non-atomic context and may take a mutex. Signed-off-by: John Ogness Reviewed-by: Petr Mladek Signed-off-by: Petr Mladek Link: https://lore.kernel.org/r/20210303101528.29901-3-john.ogness@linutronix.de --- drivers/mtd/mtdoops.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/mtd/mtdoops.c b/drivers/mtd/mtdoops.c index 774970bfcf85..8bbfba40a554 100644 --- a/drivers/mtd/mtdoops.c +++ b/drivers/mtd/mtdoops.c @@ -52,6 +52,7 @@ static struct mtdoops_context { int nextcount; unsigned long *oops_page_used; + unsigned long oops_buf_busy; void *oops_buf; } oops_cxt; @@ -180,6 +181,9 @@ static void mtdoops_write(struct mtdoops_context *cxt, int panic) u32 *hdr; int ret; + if (test_and_set_bit(0, &cxt->oops_buf_busy)) + return; + /* Add mtdoops header to the buffer */ hdr = cxt->oops_buf; hdr[0] = cxt->nextcount; @@ -190,7 +194,7 @@ static void mtdoops_write(struct mtdoops_context *cxt, int panic) record_size, &retlen, cxt->oops_buf); if (ret == -EOPNOTSUPP) { printk(KERN_ERR "mtdoops: Cannot write from panic without panic_write\n"); - return; + goto out; } } else ret = mtd_write(mtd, cxt->nextpage * record_size, @@ -203,6 +207,8 @@ static void mtdoops_write(struct mtdoops_context *cxt, int panic) memset(cxt->oops_buf, 0xff, record_size); mtdoops_inc_counter(cxt); +out: + clear_bit(0, &cxt->oops_buf_busy); } static void mtdoops_workfunc_write(struct work_struct *work) @@ -276,8 +282,11 @@ static void mtdoops_do_dump(struct kmsg_dumper *dumper, if (reason == KMSG_DUMP_OOPS && !dump_oops) return; + if (test_and_set_bit(0, &cxt->oops_buf_busy)) + return; kmsg_dump_get_buffer(dumper, true, cxt->oops_buf + MTDOOPS_HEADER_SIZE, record_size - MTDOOPS_HEADER_SIZE, NULL); + clear_bit(0, &cxt->oops_buf_busy); if (reason != KMSG_DUMP_OOPS) { /* Panics must be written immediately */ @@ -394,6 +403,7 @@ static int __init mtdoops_init(void) return -ENOMEM; } memset(cxt->oops_buf, 0xff, record_size); + cxt->oops_buf_busy = 0; INIT_WORK(&cxt->work_erase, mtdoops_workfunc_erase); INIT_WORK(&cxt->work_write, mtdoops_workfunc_write); From bb07b16c44b2c6ddbafa44bb06454719002e828e Mon Sep 17 00:00:00 2001 From: John Ogness Date: Wed, 3 Mar 2021 11:15:16 +0100 Subject: [PATCH 03/24] printk: limit second loop of syslog_print_all The second loop of syslog_print_all() subtracts lengths that were added in the first loop. With commit b031a684bfd0 ("printk: remove logbuf_lock writer-protection of ringbuffer") it is possible that records are (over)written during syslog_print_all(). This allows the possibility of the second loop subtracting lengths that were never added in the first loop. This situation can result in syslog_print_all() filling the buffer starting from a later record, even though there may have been room to fit the earlier record(s) as well. Fixes: b031a684bfd0 ("printk: remove logbuf_lock writer-protection of ringbuffer") Signed-off-by: John Ogness Reviewed-by: Petr Mladek Signed-off-by: Petr Mladek Link: https://lore.kernel.org/r/20210303101528.29901-4-john.ogness@linutronix.de --- kernel/printk/printk.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 575a34b88936..77ae2704e979 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -1494,6 +1494,7 @@ static int syslog_print_all(char __user *buf, int size, bool clear) struct printk_info info; unsigned int line_count; struct printk_record r; + u64 max_seq; char *text; int len = 0; u64 seq; @@ -1512,9 +1513,15 @@ static int syslog_print_all(char __user *buf, int size, bool clear) prb_for_each_info(clear_seq, prb, seq, &info, &line_count) len += get_record_print_text_size(&info, line_count, true, time); + /* + * Set an upper bound for the next loop to avoid subtracting lengths + * that were never added. + */ + max_seq = seq; + /* move first record forward until length fits into the buffer */ prb_for_each_info(clear_seq, prb, seq, &info, &line_count) { - if (len <= size) + if (len <= size || info.seq >= max_seq) break; len -= get_record_print_text_size(&info, line_count, true, time); } From e831e400f73b0e7aa4629200c2ef71e068262414 Mon Sep 17 00:00:00 2001 From: John Ogness Date: Wed, 3 Mar 2021 11:15:17 +0100 Subject: [PATCH 04/24] printk: kmsg_dump: remove unused fields struct kmsg_dumper still contains some fields that were used to iterate the old ringbuffer. They are no longer used. Remove them and update the struct documentation. Signed-off-by: John Ogness Reviewed-by: Petr Mladek Signed-off-by: Petr Mladek Link: https://lore.kernel.org/r/20210303101528.29901-5-john.ogness@linutronix.de --- include/linux/kmsg_dump.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/include/linux/kmsg_dump.h b/include/linux/kmsg_dump.h index 3378bcbe585e..ae38035f1dca 100644 --- a/include/linux/kmsg_dump.h +++ b/include/linux/kmsg_dump.h @@ -36,6 +36,9 @@ enum kmsg_dump_reason { * through the record iterator * @max_reason: filter for highest reason number that should be dumped * @registered: Flag that specifies if this is already registered + * @active: Flag that specifies if this is currently dumping + * @cur_seq: Points to the oldest message to dump + * @next_seq: Points after the newest message to dump */ struct kmsg_dumper { struct list_head list; @@ -45,8 +48,6 @@ struct kmsg_dumper { bool registered; /* private state of the kmsg iterator */ - u32 cur_idx; - u32 next_idx; u64 cur_seq; u64 next_seq; }; From 726b5097701a8d46f5354be780e1a11fc4ca1187 Mon Sep 17 00:00:00 2001 From: John Ogness Date: Wed, 3 Mar 2021 11:15:18 +0100 Subject: [PATCH 05/24] printk: refactor kmsg_dump_get_buffer() kmsg_dump_get_buffer() requires nearly the same logic as syslog_print_all(), but uses different variable names and does not make use of the ringbuffer loop macros. Modify kmsg_dump_get_buffer() so that the implementation is as similar to syslog_print_all() as possible. A follow-up commit will move this common logic into a separate helper function. Signed-off-by: John Ogness Reviewed-by: Petr Mladek Signed-off-by: Petr Mladek Link: https://lore.kernel.org/r/20210303101528.29901-6-john.ogness@linutronix.de --- include/linux/kmsg_dump.h | 2 +- kernel/printk/printk.c | 62 +++++++++++++++++++++------------------ 2 files changed, 34 insertions(+), 30 deletions(-) diff --git a/include/linux/kmsg_dump.h b/include/linux/kmsg_dump.h index ae38035f1dca..070c994ff19f 100644 --- a/include/linux/kmsg_dump.h +++ b/include/linux/kmsg_dump.h @@ -62,7 +62,7 @@ bool kmsg_dump_get_line(struct kmsg_dumper *dumper, bool syslog, char *line, size_t size, size_t *len); bool kmsg_dump_get_buffer(struct kmsg_dumper *dumper, bool syslog, - char *buf, size_t size, size_t *len); + char *buf, size_t size, size_t *len_out); void kmsg_dump_rewind_nolock(struct kmsg_dumper *dumper); diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 77ae2704e979..ed678d84dc51 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -3410,7 +3410,7 @@ EXPORT_SYMBOL_GPL(kmsg_dump_get_line); * @syslog: include the "<4>" prefixes * @buf: buffer to copy the line to * @size: maximum size of the buffer - * @len: length of line placed into buffer + * @len_out: length of line placed into buffer * * Start at the end of the kmsg buffer and fill the provided buffer * with as many of the *youngest* kmsg records that fit into it. @@ -3424,7 +3424,7 @@ EXPORT_SYMBOL_GPL(kmsg_dump_get_line); * read. */ bool kmsg_dump_get_buffer(struct kmsg_dumper *dumper, bool syslog, - char *buf, size_t size, size_t *len) + char *buf, size_t size, size_t *len_out) { struct printk_info info; unsigned int line_count; @@ -3432,12 +3432,10 @@ bool kmsg_dump_get_buffer(struct kmsg_dumper *dumper, bool syslog, unsigned long flags; u64 seq; u64 next_seq; - size_t l = 0; + size_t len = 0; bool ret = false; bool time = printk_time; - prb_rec_init_rd(&r, &info, buf, size); - if (!dumper->active || !buf || !size) goto out; @@ -3455,48 +3453,54 @@ bool kmsg_dump_get_buffer(struct kmsg_dumper *dumper, bool syslog, goto out; } - /* calculate length of entire buffer */ - seq = dumper->cur_seq; - while (prb_read_valid_info(prb, seq, &info, &line_count)) { - if (r.info->seq >= dumper->next_seq) + /* + * Find first record that fits, including all following records, + * into the user-provided buffer for this dump. + */ + + prb_for_each_info(dumper->cur_seq, prb, seq, &info, &line_count) { + if (info.seq >= dumper->next_seq) break; - l += get_record_print_text_size(&info, line_count, syslog, time); - seq = r.info->seq + 1; + len += get_record_print_text_size(&info, line_count, syslog, time); } - /* move first record forward until length fits into the buffer */ - seq = dumper->cur_seq; - while (l >= size && prb_read_valid_info(prb, seq, - &info, &line_count)) { - if (r.info->seq >= dumper->next_seq) + /* + * Move first record forward until length fits into the buffer. Ignore + * newest messages that were not counted in the above cycle. Messages + * might appear and get lost in the meantime. This is the best effort + * that prevents an infinite loop. + */ + prb_for_each_info(dumper->cur_seq, prb, seq, &info, &line_count) { + if (len < size || info.seq >= dumper->next_seq) break; - l -= get_record_print_text_size(&info, line_count, syslog, time); - seq = r.info->seq + 1; + len -= get_record_print_text_size(&info, line_count, syslog, time); } - /* last message in next interation */ + /* + * Next kmsg_dump_get_buffer() invocation will dump block of + * older records stored right before this one. + */ next_seq = seq; - /* actually read text into the buffer now */ - l = 0; - while (prb_read_valid(prb, seq, &r)) { + prb_rec_init_rd(&r, &info, buf, size); + + len = 0; + prb_for_each_record(seq, prb, seq, &r) { if (r.info->seq >= dumper->next_seq) break; - l += record_print_text(&r, syslog, time); + len += record_print_text(&r, syslog, time); - /* adjust record to store to remaining buffer space */ - prb_rec_init_rd(&r, &info, buf + l, size - l); - - seq = r.info->seq + 1; + /* Adjust record to store to remaining buffer space. */ + prb_rec_init_rd(&r, &info, buf + len, size - len); } dumper->next_seq = next_seq; ret = true; logbuf_unlock_irqrestore(flags); out: - if (len) - *len = l; + if (len_out) + *len_out = len; return ret; } EXPORT_SYMBOL_GPL(kmsg_dump_get_buffer); From 4260e0e5510158d704898603331e5365ebe957de Mon Sep 17 00:00:00 2001 From: John Ogness Date: Wed, 3 Mar 2021 11:15:19 +0100 Subject: [PATCH 06/24] printk: consolidate kmsg_dump_get_buffer/syslog_print_all code The logic for finding records to fit into a buffer is the same for kmsg_dump_get_buffer() and syslog_print_all(). Introduce a helper function find_first_fitting_seq() to handle this logic. Signed-off-by: John Ogness Reviewed-by: Petr Mladek Signed-off-by: Petr Mladek Link: https://lore.kernel.org/r/20210303101528.29901-7-john.ogness@linutronix.de --- kernel/printk/printk.c | 87 ++++++++++++++++++++++++------------------ 1 file changed, 50 insertions(+), 37 deletions(-) diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index ed678d84dc51..9a5f9ccc46ea 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -1421,6 +1421,50 @@ static size_t get_record_print_text_size(struct printk_info *info, return ((prefix_len * line_count) + info->text_len + 1); } +/* + * Beginning with @start_seq, find the first record where it and all following + * records up to (but not including) @max_seq fit into @size. + * + * @max_seq is simply an upper bound and does not need to exist. If the caller + * does not require an upper bound, -1 can be used for @max_seq. + */ +static u64 find_first_fitting_seq(u64 start_seq, u64 max_seq, size_t size, + bool syslog, bool time) +{ + struct printk_info info; + unsigned int line_count; + size_t len = 0; + u64 seq; + + /* Determine the size of the records up to @max_seq. */ + prb_for_each_info(start_seq, prb, seq, &info, &line_count) { + if (info.seq >= max_seq) + break; + len += get_record_print_text_size(&info, line_count, syslog, time); + } + + /* + * Adjust the upper bound for the next loop to avoid subtracting + * lengths that were never added. + */ + if (seq < max_seq) + max_seq = seq; + + /* + * Move first record forward until length fits into the buffer. Ignore + * newest messages that were not counted in the above cycle. Messages + * might appear and get lost in the meantime. This is a best effort + * that prevents an infinite loop that could occur with a retry. + */ + prb_for_each_info(start_seq, prb, seq, &info, &line_count) { + if (len <= size || info.seq >= max_seq) + break; + len -= get_record_print_text_size(&info, line_count, syslog, time); + } + + return seq; +} + static int syslog_print(char __user *buf, int size) { struct printk_info info; @@ -1492,9 +1536,7 @@ static int syslog_print(char __user *buf, int size) static int syslog_print_all(char __user *buf, int size, bool clear) { struct printk_info info; - unsigned int line_count; struct printk_record r; - u64 max_seq; char *text; int len = 0; u64 seq; @@ -1510,21 +1552,7 @@ static int syslog_print_all(char __user *buf, int size, bool clear) * Find first record that fits, including all following records, * into the user-provided buffer for this dump. */ - prb_for_each_info(clear_seq, prb, seq, &info, &line_count) - len += get_record_print_text_size(&info, line_count, true, time); - - /* - * Set an upper bound for the next loop to avoid subtracting lengths - * that were never added. - */ - max_seq = seq; - - /* move first record forward until length fits into the buffer */ - prb_for_each_info(clear_seq, prb, seq, &info, &line_count) { - if (len <= size || info.seq >= max_seq) - break; - len -= get_record_print_text_size(&info, line_count, true, time); - } + seq = find_first_fitting_seq(clear_seq, -1, size, true, time); prb_rec_init_rd(&r, &info, text, LOG_LINE_MAX + PREFIX_MAX); @@ -3427,7 +3455,6 @@ bool kmsg_dump_get_buffer(struct kmsg_dumper *dumper, bool syslog, char *buf, size_t size, size_t *len_out) { struct printk_info info; - unsigned int line_count; struct printk_record r; unsigned long flags; u64 seq; @@ -3455,26 +3482,12 @@ bool kmsg_dump_get_buffer(struct kmsg_dumper *dumper, bool syslog, /* * Find first record that fits, including all following records, - * into the user-provided buffer for this dump. + * into the user-provided buffer for this dump. Pass in size-1 + * because this function (by way of record_print_text()) will + * not write more than size-1 bytes of text into @buf. */ - - prb_for_each_info(dumper->cur_seq, prb, seq, &info, &line_count) { - if (info.seq >= dumper->next_seq) - break; - len += get_record_print_text_size(&info, line_count, syslog, time); - } - - /* - * Move first record forward until length fits into the buffer. Ignore - * newest messages that were not counted in the above cycle. Messages - * might appear and get lost in the meantime. This is the best effort - * that prevents an infinite loop. - */ - prb_for_each_info(dumper->cur_seq, prb, seq, &info, &line_count) { - if (len < size || info.seq >= dumper->next_seq) - break; - len -= get_record_print_text_size(&info, line_count, syslog, time); - } + seq = find_first_fitting_seq(dumper->cur_seq, dumper->next_seq, + size - 1, syslog, time); /* * Next kmsg_dump_get_buffer() invocation will dump block of From cf5b0208fda4602d0ef383a86e942fb3dcb8852b Mon Sep 17 00:00:00 2001 From: John Ogness Date: Wed, 3 Mar 2021 11:15:20 +0100 Subject: [PATCH 07/24] printk: introduce CONSOLE_LOG_MAX Instead of using "LOG_LINE_MAX + PREFIX_MAX" for temporary buffer sizes, introduce CONSOLE_LOG_MAX. This represents the maximum size that is allowed to be printed to the console for a single record. Signed-off-by: John Ogness Reviewed-by: Petr Mladek Signed-off-by: Petr Mladek Link: https://lore.kernel.org/r/20210303101528.29901-8-john.ogness@linutronix.de --- kernel/printk/printk.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 9a5f9ccc46ea..2c8873fa2f29 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -410,7 +410,12 @@ static u64 clear_seq; #else #define PREFIX_MAX 32 #endif -#define LOG_LINE_MAX (1024 - PREFIX_MAX) + +/* the maximum size of a formatted record (i.e. with prefix added per line) */ +#define CONSOLE_LOG_MAX 1024 + +/* the maximum size allowed to be reserved for a record */ +#define LOG_LINE_MAX (CONSOLE_LOG_MAX - PREFIX_MAX) #define LOG_LEVEL(v) ((v) & 0x07) #define LOG_FACILITY(v) ((v) >> 3 & 0xff) @@ -1472,11 +1477,11 @@ static int syslog_print(char __user *buf, int size) char *text; int len = 0; - text = kmalloc(LOG_LINE_MAX + PREFIX_MAX, GFP_KERNEL); + text = kmalloc(CONSOLE_LOG_MAX, GFP_KERNEL); if (!text) return -ENOMEM; - prb_rec_init_rd(&r, &info, text, LOG_LINE_MAX + PREFIX_MAX); + prb_rec_init_rd(&r, &info, text, CONSOLE_LOG_MAX); while (size > 0) { size_t n; @@ -1542,7 +1547,7 @@ static int syslog_print_all(char __user *buf, int size, bool clear) u64 seq; bool time; - text = kmalloc(LOG_LINE_MAX + PREFIX_MAX, GFP_KERNEL); + text = kmalloc(CONSOLE_LOG_MAX, GFP_KERNEL); if (!text) return -ENOMEM; @@ -1554,7 +1559,7 @@ static int syslog_print_all(char __user *buf, int size, bool clear) */ seq = find_first_fitting_seq(clear_seq, -1, size, true, time); - prb_rec_init_rd(&r, &info, text, LOG_LINE_MAX + PREFIX_MAX); + prb_rec_init_rd(&r, &info, text, CONSOLE_LOG_MAX); len = 0; prb_for_each_record(seq, prb, seq, &r) { @@ -2187,8 +2192,7 @@ EXPORT_SYMBOL(printk); #else /* CONFIG_PRINTK */ -#define LOG_LINE_MAX 0 -#define PREFIX_MAX 0 +#define CONSOLE_LOG_MAX 0 #define printk_time false #define prb_read_valid(rb, seq, r) false @@ -2506,7 +2510,7 @@ static inline int can_use_console(void) void console_unlock(void) { static char ext_text[CONSOLE_EXT_LOG_MAX]; - static char text[LOG_LINE_MAX + PREFIX_MAX]; + static char text[CONSOLE_LOG_MAX]; unsigned long flags; bool do_cond_resched, retry; struct printk_info info; From 7d7a23a91c915f6a90b2a636b130c53e0fe5154c Mon Sep 17 00:00:00 2001 From: John Ogness Date: Wed, 3 Mar 2021 11:15:21 +0100 Subject: [PATCH 08/24] printk: use seqcount_latch for clear_seq kmsg_dump_rewind_nolock() locklessly reads @clear_seq. However, this is not done atomically. Since @clear_seq is 64-bit, this cannot be an atomic operation for all platforms. Therefore, use a seqcount_latch to allow readers to always read a consistent value. Signed-off-by: John Ogness Reviewed-by: Petr Mladek Signed-off-by: Petr Mladek Link: https://lore.kernel.org/r/20210303101528.29901-9-john.ogness@linutronix.de --- kernel/printk/printk.c | 58 ++++++++++++++++++++++++++++++++++++------ 1 file changed, 50 insertions(+), 8 deletions(-) diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 2c8873fa2f29..1b4bb88c3547 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -402,8 +402,21 @@ static u64 console_seq; static u64 exclusive_console_stop_seq; static unsigned long console_dropped; -/* the next printk record to read after the last 'clear' command */ -static u64 clear_seq; +struct latched_seq { + seqcount_latch_t latch; + u64 val[2]; +}; + +/* + * The next printk record to read after the last 'clear' command. There are + * two copies (updated with seqcount_latch) so that reads can locklessly + * access a valid value. Writers are synchronized by @logbuf_lock. + */ +static struct latched_seq clear_seq = { + .latch = SEQCNT_LATCH_ZERO(clear_seq.latch), + .val[0] = 0, + .val[1] = 0, +}; #ifdef CONFIG_PRINTK_CALLER #define PREFIX_MAX 48 @@ -457,6 +470,31 @@ bool printk_percpu_data_ready(void) return __printk_percpu_data_ready; } +/* Must be called under logbuf_lock. */ +static void latched_seq_write(struct latched_seq *ls, u64 val) +{ + raw_write_seqcount_latch(&ls->latch); + ls->val[0] = val; + raw_write_seqcount_latch(&ls->latch); + ls->val[1] = val; +} + +/* Can be called from any context. */ +static u64 latched_seq_read_nolock(struct latched_seq *ls) +{ + unsigned int seq; + unsigned int idx; + u64 val; + + do { + seq = raw_read_seqcount_latch(&ls->latch); + idx = seq & 0x1; + val = ls->val[idx]; + } while (read_seqcount_latch_retry(&ls->latch, seq)); + + return val; +} + /* Return log buffer address */ char *log_buf_addr_get(void) { @@ -801,7 +839,7 @@ static loff_t devkmsg_llseek(struct file *file, loff_t offset, int whence) * like issued by 'dmesg -c'. Reading /dev/kmsg itself * changes no global state, and does not clear anything. */ - user->seq = clear_seq; + user->seq = latched_seq_read_nolock(&clear_seq); break; case SEEK_END: /* after the last record */ @@ -960,6 +998,9 @@ void log_buf_vmcoreinfo_setup(void) VMCOREINFO_SIZE(atomic_long_t); VMCOREINFO_TYPE_OFFSET(atomic_long_t, counter); + + VMCOREINFO_STRUCT_SIZE(latched_seq); + VMCOREINFO_OFFSET(latched_seq, val); } #endif @@ -1557,7 +1598,8 @@ static int syslog_print_all(char __user *buf, int size, bool clear) * Find first record that fits, including all following records, * into the user-provided buffer for this dump. */ - seq = find_first_fitting_seq(clear_seq, -1, size, true, time); + seq = find_first_fitting_seq(latched_seq_read_nolock(&clear_seq), -1, + size, true, time); prb_rec_init_rd(&r, &info, text, CONSOLE_LOG_MAX); @@ -1584,7 +1626,7 @@ static int syslog_print_all(char __user *buf, int size, bool clear) } if (clear) - clear_seq = seq; + latched_seq_write(&clear_seq, seq); logbuf_unlock_irq(); kfree(text); @@ -1594,7 +1636,7 @@ static int syslog_print_all(char __user *buf, int size, bool clear) static void syslog_clear(void) { logbuf_lock_irq(); - clear_seq = prb_next_seq(prb); + latched_seq_write(&clear_seq, prb_next_seq(prb)); logbuf_unlock_irq(); } @@ -3336,7 +3378,7 @@ void kmsg_dump(enum kmsg_dump_reason reason) dumper->active = true; logbuf_lock_irqsave(flags); - dumper->cur_seq = clear_seq; + dumper->cur_seq = latched_seq_read_nolock(&clear_seq); dumper->next_seq = prb_next_seq(prb); logbuf_unlock_irqrestore(flags); @@ -3534,7 +3576,7 @@ EXPORT_SYMBOL_GPL(kmsg_dump_get_buffer); */ void kmsg_dump_rewind_nolock(struct kmsg_dumper *dumper) { - dumper->cur_seq = clear_seq; + dumper->cur_seq = latched_seq_read_nolock(&clear_seq); dumper->next_seq = prb_next_seq(prb); } From 35b2b1634849ac19b2dd28cae6c97b64c4299830 Mon Sep 17 00:00:00 2001 From: John Ogness Date: Wed, 3 Mar 2021 11:15:22 +0100 Subject: [PATCH 09/24] printk: use atomic64_t for devkmsg_user.seq @user->seq is indirectly protected by @logbuf_lock. Once @logbuf_lock is removed, @user->seq will be no longer safe from an atomicity point of view. In preparation for the removal of @logbuf_lock, change it to atomic64_t to provide this safety. Signed-off-by: John Ogness Reviewed-by: Petr Mladek Signed-off-by: Petr Mladek Link: https://lore.kernel.org/r/20210303101528.29901-10-john.ogness@linutronix.de --- kernel/printk/printk.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 1b4bb88c3547..65e216ca6ca6 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -662,7 +662,7 @@ out: /* /dev/kmsg - userspace message inject/listen interface */ struct devkmsg_user { - u64 seq; + atomic64_t seq; struct ratelimit_state rs; struct mutex lock; char buf[CONSOLE_EXT_LOG_MAX]; @@ -763,7 +763,7 @@ static ssize_t devkmsg_read(struct file *file, char __user *buf, return ret; logbuf_lock_irq(); - if (!prb_read_valid(prb, user->seq, r)) { + if (!prb_read_valid(prb, atomic64_read(&user->seq), r)) { if (file->f_flags & O_NONBLOCK) { ret = -EAGAIN; logbuf_unlock_irq(); @@ -772,15 +772,15 @@ static ssize_t devkmsg_read(struct file *file, char __user *buf, logbuf_unlock_irq(); ret = wait_event_interruptible(log_wait, - prb_read_valid(prb, user->seq, r)); + prb_read_valid(prb, atomic64_read(&user->seq), r)); if (ret) goto out; logbuf_lock_irq(); } - if (r->info->seq != user->seq) { + if (r->info->seq != atomic64_read(&user->seq)) { /* our last seen message is gone, return error and reset */ - user->seq = r->info->seq; + atomic64_set(&user->seq, r->info->seq); ret = -EPIPE; logbuf_unlock_irq(); goto out; @@ -791,7 +791,7 @@ static ssize_t devkmsg_read(struct file *file, char __user *buf, &r->text_buf[0], r->info->text_len, &r->info->dev_info); - user->seq = r->info->seq + 1; + atomic64_set(&user->seq, r->info->seq + 1); logbuf_unlock_irq(); if (len > count) { @@ -831,7 +831,7 @@ static loff_t devkmsg_llseek(struct file *file, loff_t offset, int whence) switch (whence) { case SEEK_SET: /* the first record */ - user->seq = prb_first_valid_seq(prb); + atomic64_set(&user->seq, prb_first_valid_seq(prb)); break; case SEEK_DATA: /* @@ -839,11 +839,11 @@ static loff_t devkmsg_llseek(struct file *file, loff_t offset, int whence) * like issued by 'dmesg -c'. Reading /dev/kmsg itself * changes no global state, and does not clear anything. */ - user->seq = latched_seq_read_nolock(&clear_seq); + atomic64_set(&user->seq, latched_seq_read_nolock(&clear_seq)); break; case SEEK_END: /* after the last record */ - user->seq = prb_next_seq(prb); + atomic64_set(&user->seq, prb_next_seq(prb)); break; default: ret = -EINVAL; @@ -864,9 +864,9 @@ static __poll_t devkmsg_poll(struct file *file, poll_table *wait) poll_wait(file, &log_wait, wait); logbuf_lock_irq(); - if (prb_read_valid_info(prb, user->seq, &info, NULL)) { + if (prb_read_valid_info(prb, atomic64_read(&user->seq), &info, NULL)) { /* return error when data has vanished underneath us */ - if (info.seq != user->seq) + if (info.seq != atomic64_read(&user->seq)) ret = EPOLLIN|EPOLLRDNORM|EPOLLERR|EPOLLPRI; else ret = EPOLLIN|EPOLLRDNORM; @@ -905,7 +905,7 @@ static int devkmsg_open(struct inode *inode, struct file *file) &user->text_buf[0], sizeof(user->text_buf)); logbuf_lock_irq(); - user->seq = prb_first_valid_seq(prb); + atomic64_set(&user->seq, prb_first_valid_seq(prb)); logbuf_unlock_irq(); file->private_data = user; From 636babdc06961f173696cb664550b4cc239dde2e Mon Sep 17 00:00:00 2001 From: John Ogness Date: Wed, 3 Mar 2021 11:15:23 +0100 Subject: [PATCH 10/24] printk: add syslog_lock The global variables @syslog_seq, @syslog_partial, @syslog_time and write access to @clear_seq are protected by @logbuf_lock. Once @logbuf_lock is removed, these variables will need their own synchronization method. Introduce @syslog_lock for this purpose. @syslog_lock is a raw_spin_lock for now. This simplifies the transition to removing @logbuf_lock. Once @logbuf_lock and the safe buffers are removed, @syslog_lock can change to spin_lock. Signed-off-by: John Ogness Reviewed-by: Petr Mladek Signed-off-by: Petr Mladek Link: https://lore.kernel.org/r/20210303101528.29901-11-john.ogness@linutronix.de --- kernel/printk/printk.c | 41 +++++++++++++++++++++++++++++++++++++---- 1 file changed, 37 insertions(+), 4 deletions(-) diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 65e216ca6ca6..e794a08de00f 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -390,8 +390,12 @@ DEFINE_RAW_SPINLOCK(logbuf_lock); printk_safe_exit_irqrestore(flags); \ } while (0) +/* syslog_lock protects syslog_* variables and write access to clear_seq. */ +static DEFINE_RAW_SPINLOCK(syslog_lock); + #ifdef CONFIG_PRINTK DECLARE_WAIT_QUEUE_HEAD(log_wait); +/* All 3 protected by @syslog_lock. */ /* the next printk record to read by syslog(READ) or /proc/kmsg */ static u64 syslog_seq; static size_t syslog_partial; @@ -410,7 +414,7 @@ struct latched_seq { /* * The next printk record to read after the last 'clear' command. There are * two copies (updated with seqcount_latch) so that reads can locklessly - * access a valid value. Writers are synchronized by @logbuf_lock. + * access a valid value. Writers are synchronized by @syslog_lock. */ static struct latched_seq clear_seq = { .latch = SEQCNT_LATCH_ZERO(clear_seq.latch), @@ -470,7 +474,7 @@ bool printk_percpu_data_ready(void) return __printk_percpu_data_ready; } -/* Must be called under logbuf_lock. */ +/* Must be called under syslog_lock. */ static void latched_seq_write(struct latched_seq *ls, u64 val) { raw_write_seqcount_latch(&ls->latch); @@ -1529,7 +1533,9 @@ static int syslog_print(char __user *buf, int size) size_t skip; logbuf_lock_irq(); + raw_spin_lock(&syslog_lock); if (!prb_read_valid(prb, syslog_seq, &r)) { + raw_spin_unlock(&syslog_lock); logbuf_unlock_irq(); break; } @@ -1559,6 +1565,7 @@ static int syslog_print(char __user *buf, int size) syslog_partial += n; } else n = 0; + raw_spin_unlock(&syslog_lock); logbuf_unlock_irq(); if (!n) @@ -1625,8 +1632,11 @@ static int syslog_print_all(char __user *buf, int size, bool clear) break; } - if (clear) + if (clear) { + raw_spin_lock(&syslog_lock); latched_seq_write(&clear_seq, seq); + raw_spin_unlock(&syslog_lock); + } logbuf_unlock_irq(); kfree(text); @@ -1636,10 +1646,24 @@ static int syslog_print_all(char __user *buf, int size, bool clear) static void syslog_clear(void) { logbuf_lock_irq(); + raw_spin_lock(&syslog_lock); latched_seq_write(&clear_seq, prb_next_seq(prb)); + raw_spin_unlock(&syslog_lock); logbuf_unlock_irq(); } +/* Return a consistent copy of @syslog_seq. */ +static u64 read_syslog_seq_irq(void) +{ + u64 seq; + + raw_spin_lock_irq(&syslog_lock); + seq = syslog_seq; + raw_spin_unlock_irq(&syslog_lock); + + return seq; +} + int do_syslog(int type, char __user *buf, int len, int source) { struct printk_info info; @@ -1663,8 +1687,9 @@ int do_syslog(int type, char __user *buf, int len, int source) return 0; if (!access_ok(buf, len)) return -EFAULT; + error = wait_event_interruptible(log_wait, - prb_read_valid(prb, syslog_seq, NULL)); + prb_read_valid(prb, read_syslog_seq_irq(), NULL)); if (error) return error; error = syslog_print(buf, len); @@ -1713,8 +1738,10 @@ int do_syslog(int type, char __user *buf, int len, int source) /* Number of chars in the log buffer */ case SYSLOG_ACTION_SIZE_UNREAD: logbuf_lock_irq(); + raw_spin_lock(&syslog_lock); if (!prb_read_valid_info(prb, syslog_seq, &info, NULL)) { /* No unread messages. */ + raw_spin_unlock(&syslog_lock); logbuf_unlock_irq(); return 0; } @@ -1743,6 +1770,7 @@ int do_syslog(int type, char __user *buf, int len, int source) } error -= syslog_partial; } + raw_spin_unlock(&syslog_lock); logbuf_unlock_irq(); break; /* Size of the log buffer */ @@ -2992,7 +3020,12 @@ void register_console(struct console *newcon) */ exclusive_console = newcon; exclusive_console_stop_seq = console_seq; + + /* Get a consistent copy of @syslog_seq. */ + raw_spin_lock(&syslog_lock); console_seq = syslog_seq; + raw_spin_unlock(&syslog_lock); + logbuf_unlock_irqrestore(flags); } console_unlock(); From 5f6c7648e556f41a3064bb6dceb9e102c50b618d Mon Sep 17 00:00:00 2001 From: John Ogness Date: Wed, 3 Mar 2021 11:15:24 +0100 Subject: [PATCH 11/24] printk: kmsg_dumper: remove @active field All 6 kmsg_dumpers do not benefit from the @active flag: (provide their own synchronization) - arch/powerpc/kernel/nvram_64.c - arch/um/kernel/kmsg_dump.c - drivers/mtd/mtdoops.c - fs/pstore/platform.c (only dump on KMSG_DUMP_PANIC, which does not require synchronization) - arch/powerpc/platforms/powernv/opal-kmsg.c - drivers/hv/vmbus_drv.c The other 2 kmsg_dump users also do not rely on @active: (hard-code @active to always be true) - arch/powerpc/xmon/xmon.c - kernel/debug/kdb/kdb_main.c Therefore, @active can be removed. Signed-off-by: John Ogness Reviewed-by: Petr Mladek Signed-off-by: Petr Mladek Link: https://lore.kernel.org/r/20210303101528.29901-12-john.ogness@linutronix.de --- arch/powerpc/xmon/xmon.c | 2 +- include/linux/kmsg_dump.h | 2 -- kernel/debug/kdb/kdb_main.c | 2 +- kernel/printk/printk.c | 10 +--------- 4 files changed, 3 insertions(+), 13 deletions(-) diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index dcd817ca2edf..a777573a0c38 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -3005,7 +3005,7 @@ print_address(unsigned long addr) static void dump_log_buf(void) { - struct kmsg_dumper dumper = { .active = 1 }; + struct kmsg_dumper dumper; unsigned char buf[128]; size_t len; diff --git a/include/linux/kmsg_dump.h b/include/linux/kmsg_dump.h index 070c994ff19f..84eaa2090efa 100644 --- a/include/linux/kmsg_dump.h +++ b/include/linux/kmsg_dump.h @@ -36,7 +36,6 @@ enum kmsg_dump_reason { * through the record iterator * @max_reason: filter for highest reason number that should be dumped * @registered: Flag that specifies if this is already registered - * @active: Flag that specifies if this is currently dumping * @cur_seq: Points to the oldest message to dump * @next_seq: Points after the newest message to dump */ @@ -44,7 +43,6 @@ struct kmsg_dumper { struct list_head list; void (*dump)(struct kmsg_dumper *dumper, enum kmsg_dump_reason reason); enum kmsg_dump_reason max_reason; - bool active; bool registered; /* private state of the kmsg iterator */ diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c index 930ac1b25ec7..315169d5e119 100644 --- a/kernel/debug/kdb/kdb_main.c +++ b/kernel/debug/kdb/kdb_main.c @@ -2101,7 +2101,7 @@ static int kdb_dmesg(int argc, const char **argv) int adjust = 0; int n = 0; int skip = 0; - struct kmsg_dumper dumper = { .active = 1 }; + struct kmsg_dumper dumper; size_t len; char buf[201]; diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index e794a08de00f..ce4cc64ba7c9 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -3408,8 +3408,6 @@ void kmsg_dump(enum kmsg_dump_reason reason) continue; /* initialize iterator with data about the stored records */ - dumper->active = true; - logbuf_lock_irqsave(flags); dumper->cur_seq = latched_seq_read_nolock(&clear_seq); dumper->next_seq = prb_next_seq(prb); @@ -3417,9 +3415,6 @@ void kmsg_dump(enum kmsg_dump_reason reason) /* invoke dumper which will iterate over records */ dumper->dump(dumper, reason); - - /* reset iterator */ - dumper->active = false; } rcu_read_unlock(); } @@ -3454,9 +3449,6 @@ bool kmsg_dump_get_line_nolock(struct kmsg_dumper *dumper, bool syslog, prb_rec_init_rd(&r, &info, line, size); - if (!dumper->active) - goto out; - /* Read text or count text lines? */ if (line) { if (!prb_read_valid(prb, dumper->cur_seq, &r)) @@ -3542,7 +3534,7 @@ bool kmsg_dump_get_buffer(struct kmsg_dumper *dumper, bool syslog, bool ret = false; bool time = printk_time; - if (!dumper->active || !buf || !size) + if (!buf || !size) goto out; logbuf_lock_irqsave(flags); From f9f3f02db98bbe678a8e57fe9432b196174744a3 Mon Sep 17 00:00:00 2001 From: John Ogness Date: Wed, 3 Mar 2021 11:15:25 +0100 Subject: [PATCH 12/24] printk: introduce a kmsg_dump iterator Rather than storing the iterator information in the registered kmsg_dumper structure, create a separate iterator structure. The kmsg_dump_iter structure can reside on the stack of the caller, thus allowing lockless use of the kmsg_dump functions. Update code that accesses the kernel logs using the kmsg_dumper structure to use the new kmsg_dump_iter structure. For kmsg_dumpers, this also means adding a call to kmsg_dump_rewind() to initialize the iterator. All this is in preparation for removal of @logbuf_lock. Signed-off-by: John Ogness Reviewed-by: Kees Cook # pstore Reviewed-by: Petr Mladek Signed-off-by: Petr Mladek Link: https://lore.kernel.org/r/20210303101528.29901-13-john.ogness@linutronix.de --- arch/powerpc/kernel/nvram_64.c | 8 +++-- arch/powerpc/xmon/xmon.c | 6 ++-- arch/um/kernel/kmsg_dump.c | 5 ++- drivers/hv/vmbus_drv.c | 4 ++- drivers/mtd/mtdoops.c | 5 ++- fs/pstore/platform.c | 5 ++- include/linux/kmsg_dump.h | 36 ++++++++++--------- kernel/debug/kdb/kdb_main.c | 10 +++--- kernel/printk/printk.c | 63 +++++++++++++++++----------------- 9 files changed, 80 insertions(+), 62 deletions(-) diff --git a/arch/powerpc/kernel/nvram_64.c b/arch/powerpc/kernel/nvram_64.c index 532f22637783..3c8d9bbb51cf 100644 --- a/arch/powerpc/kernel/nvram_64.c +++ b/arch/powerpc/kernel/nvram_64.c @@ -647,6 +647,7 @@ static void oops_to_nvram(struct kmsg_dumper *dumper, { struct oops_log_info *oops_hdr = (struct oops_log_info *)oops_buf; static unsigned int oops_count = 0; + static struct kmsg_dump_iter iter; static bool panicking = false; static DEFINE_SPINLOCK(lock); unsigned long flags; @@ -681,13 +682,14 @@ static void oops_to_nvram(struct kmsg_dumper *dumper, return; if (big_oops_buf) { - kmsg_dump_get_buffer(dumper, false, + kmsg_dump_rewind(&iter); + kmsg_dump_get_buffer(&iter, false, big_oops_buf, big_oops_buf_sz, &text_len); rc = zip_oops(text_len); } if (rc != 0) { - kmsg_dump_rewind(dumper); - kmsg_dump_get_buffer(dumper, false, + kmsg_dump_rewind(&iter); + kmsg_dump_get_buffer(&iter, false, oops_data, oops_data_sz, &text_len); err_type = ERR_TYPE_KERNEL_PANIC; oops_hdr->version = cpu_to_be16(OOPS_HDR_VERSION); diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index a777573a0c38..dbcf403b1d8d 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -3005,7 +3005,7 @@ print_address(unsigned long addr) static void dump_log_buf(void) { - struct kmsg_dumper dumper; + struct kmsg_dump_iter iter; unsigned char buf[128]; size_t len; @@ -3017,9 +3017,9 @@ dump_log_buf(void) catch_memory_errors = 1; sync(); - kmsg_dump_rewind_nolock(&dumper); + kmsg_dump_rewind_nolock(&iter); xmon_start_pagination(); - while (kmsg_dump_get_line_nolock(&dumper, false, buf, sizeof(buf), &len)) { + while (kmsg_dump_get_line_nolock(&iter, false, buf, sizeof(buf), &len)) { buf[len] = '\0'; printf("%s", buf); } diff --git a/arch/um/kernel/kmsg_dump.c b/arch/um/kernel/kmsg_dump.c index a765d235e50e..0224fcb36e22 100644 --- a/arch/um/kernel/kmsg_dump.c +++ b/arch/um/kernel/kmsg_dump.c @@ -10,6 +10,7 @@ static void kmsg_dumper_stdout(struct kmsg_dumper *dumper, enum kmsg_dump_reason reason) { + static struct kmsg_dump_iter iter; static DEFINE_SPINLOCK(lock); static char line[1024]; struct console *con; @@ -35,8 +36,10 @@ static void kmsg_dumper_stdout(struct kmsg_dumper *dumper, if (!spin_trylock_irqsave(&lock, flags)) return; + kmsg_dump_rewind(&iter); + printf("kmsg_dump:\n"); - while (kmsg_dump_get_line(dumper, true, line, sizeof(line), &len)) { + while (kmsg_dump_get_line(&iter, true, line, sizeof(line), &len)) { line[len] = '\0'; printf("%s", line); } diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c index 10dce9f91216..b341b144bde8 100644 --- a/drivers/hv/vmbus_drv.c +++ b/drivers/hv/vmbus_drv.c @@ -1391,6 +1391,7 @@ static void vmbus_isr(void) static void hv_kmsg_dump(struct kmsg_dumper *dumper, enum kmsg_dump_reason reason) { + struct kmsg_dump_iter iter; size_t bytes_written; phys_addr_t panic_pa; @@ -1404,7 +1405,8 @@ static void hv_kmsg_dump(struct kmsg_dumper *dumper, * Write dump contents to the page. No need to synchronize; panic should * be single-threaded. */ - kmsg_dump_get_buffer(dumper, false, hv_panic_page, HV_HYP_PAGE_SIZE, + kmsg_dump_rewind(&iter); + kmsg_dump_get_buffer(&iter, false, hv_panic_page, HV_HYP_PAGE_SIZE, &bytes_written); if (bytes_written) hyperv_report_panic_msg(panic_pa, bytes_written); diff --git a/drivers/mtd/mtdoops.c b/drivers/mtd/mtdoops.c index 8bbfba40a554..862c4a889234 100644 --- a/drivers/mtd/mtdoops.c +++ b/drivers/mtd/mtdoops.c @@ -277,14 +277,17 @@ static void mtdoops_do_dump(struct kmsg_dumper *dumper, { struct mtdoops_context *cxt = container_of(dumper, struct mtdoops_context, dump); + struct kmsg_dump_iter iter; /* Only dump oopses if dump_oops is set */ if (reason == KMSG_DUMP_OOPS && !dump_oops) return; + kmsg_dump_rewind(&iter); + if (test_and_set_bit(0, &cxt->oops_buf_busy)) return; - kmsg_dump_get_buffer(dumper, true, cxt->oops_buf + MTDOOPS_HEADER_SIZE, + kmsg_dump_get_buffer(&iter, true, cxt->oops_buf + MTDOOPS_HEADER_SIZE, record_size - MTDOOPS_HEADER_SIZE, NULL); clear_bit(0, &cxt->oops_buf_busy); diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c index d963ae7902f9..b9614db48b1d 100644 --- a/fs/pstore/platform.c +++ b/fs/pstore/platform.c @@ -385,6 +385,7 @@ void pstore_record_init(struct pstore_record *record, static void pstore_dump(struct kmsg_dumper *dumper, enum kmsg_dump_reason reason) { + struct kmsg_dump_iter iter; unsigned long total = 0; const char *why; unsigned int part = 1; @@ -405,6 +406,8 @@ static void pstore_dump(struct kmsg_dumper *dumper, } } + kmsg_dump_rewind(&iter); + oopscount++; while (total < kmsg_bytes) { char *dst; @@ -435,7 +438,7 @@ static void pstore_dump(struct kmsg_dumper *dumper, dst_size -= header_size; /* Write dump contents. */ - if (!kmsg_dump_get_buffer(dumper, true, dst + header_size, + if (!kmsg_dump_get_buffer(&iter, true, dst + header_size, dst_size, &dump_size)) break; diff --git a/include/linux/kmsg_dump.h b/include/linux/kmsg_dump.h index 84eaa2090efa..36c8c57e1051 100644 --- a/include/linux/kmsg_dump.h +++ b/include/linux/kmsg_dump.h @@ -29,6 +29,16 @@ enum kmsg_dump_reason { KMSG_DUMP_MAX }; +/** + * struct kmsg_dump_iter - iterator for retrieving kernel messages + * @cur_seq: Points to the oldest message to dump + * @next_seq: Points after the newest message to dump + */ +struct kmsg_dump_iter { + u64 cur_seq; + u64 next_seq; +}; + /** * struct kmsg_dumper - kernel crash message dumper structure * @list: Entry in the dumper list (private) @@ -36,35 +46,29 @@ enum kmsg_dump_reason { * through the record iterator * @max_reason: filter for highest reason number that should be dumped * @registered: Flag that specifies if this is already registered - * @cur_seq: Points to the oldest message to dump - * @next_seq: Points after the newest message to dump */ struct kmsg_dumper { struct list_head list; void (*dump)(struct kmsg_dumper *dumper, enum kmsg_dump_reason reason); enum kmsg_dump_reason max_reason; bool registered; - - /* private state of the kmsg iterator */ - u64 cur_seq; - u64 next_seq; }; #ifdef CONFIG_PRINTK void kmsg_dump(enum kmsg_dump_reason reason); -bool kmsg_dump_get_line_nolock(struct kmsg_dumper *dumper, bool syslog, +bool kmsg_dump_get_line_nolock(struct kmsg_dump_iter *iter, bool syslog, char *line, size_t size, size_t *len); -bool kmsg_dump_get_line(struct kmsg_dumper *dumper, bool syslog, +bool kmsg_dump_get_line(struct kmsg_dump_iter *iter, bool syslog, char *line, size_t size, size_t *len); -bool kmsg_dump_get_buffer(struct kmsg_dumper *dumper, bool syslog, +bool kmsg_dump_get_buffer(struct kmsg_dump_iter *iter, bool syslog, char *buf, size_t size, size_t *len_out); -void kmsg_dump_rewind_nolock(struct kmsg_dumper *dumper); +void kmsg_dump_rewind_nolock(struct kmsg_dump_iter *iter); -void kmsg_dump_rewind(struct kmsg_dumper *dumper); +void kmsg_dump_rewind(struct kmsg_dump_iter *iter); int kmsg_dump_register(struct kmsg_dumper *dumper); @@ -76,30 +80,30 @@ static inline void kmsg_dump(enum kmsg_dump_reason reason) { } -static inline bool kmsg_dump_get_line_nolock(struct kmsg_dumper *dumper, +static inline bool kmsg_dump_get_line_nolock(struct kmsg_dump_iter *iter, bool syslog, const char *line, size_t size, size_t *len) { return false; } -static inline bool kmsg_dump_get_line(struct kmsg_dumper *dumper, bool syslog, +static inline bool kmsg_dump_get_line(struct kmsg_dump_iter *iter, bool syslog, const char *line, size_t size, size_t *len) { return false; } -static inline bool kmsg_dump_get_buffer(struct kmsg_dumper *dumper, bool syslog, +static inline bool kmsg_dump_get_buffer(struct kmsg_dump_iter *iter, bool syslog, char *buf, size_t size, size_t *len) { return false; } -static inline void kmsg_dump_rewind_nolock(struct kmsg_dumper *dumper) +static inline void kmsg_dump_rewind_nolock(struct kmsg_dump_iter *iter) { } -static inline void kmsg_dump_rewind(struct kmsg_dumper *dumper) +static inline void kmsg_dump_rewind(struct kmsg_dump_iter *iter) { } diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c index 315169d5e119..8544d7a55a57 100644 --- a/kernel/debug/kdb/kdb_main.c +++ b/kernel/debug/kdb/kdb_main.c @@ -2101,7 +2101,7 @@ static int kdb_dmesg(int argc, const char **argv) int adjust = 0; int n = 0; int skip = 0; - struct kmsg_dumper dumper; + struct kmsg_dump_iter iter; size_t len; char buf[201]; @@ -2126,8 +2126,8 @@ static int kdb_dmesg(int argc, const char **argv) kdb_set(2, setargs); } - kmsg_dump_rewind_nolock(&dumper); - while (kmsg_dump_get_line_nolock(&dumper, 1, NULL, 0, NULL)) + kmsg_dump_rewind_nolock(&iter); + while (kmsg_dump_get_line_nolock(&iter, 1, NULL, 0, NULL)) n++; if (lines < 0) { @@ -2159,8 +2159,8 @@ static int kdb_dmesg(int argc, const char **argv) if (skip >= n || skip < 0) return 0; - kmsg_dump_rewind_nolock(&dumper); - while (kmsg_dump_get_line_nolock(&dumper, 1, buf, sizeof(buf), &len)) { + kmsg_dump_rewind_nolock(&iter); + while (kmsg_dump_get_line_nolock(&iter, 1, buf, sizeof(buf), &len)) { if (skip) { skip--; continue; diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index ce4cc64ba7c9..b49dee256947 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -3390,7 +3390,6 @@ EXPORT_SYMBOL_GPL(kmsg_dump_reason_str); void kmsg_dump(enum kmsg_dump_reason reason) { struct kmsg_dumper *dumper; - unsigned long flags; rcu_read_lock(); list_for_each_entry_rcu(dumper, &dump_list, list) { @@ -3407,12 +3406,6 @@ void kmsg_dump(enum kmsg_dump_reason reason) if (reason > max_reason) continue; - /* initialize iterator with data about the stored records */ - logbuf_lock_irqsave(flags); - dumper->cur_seq = latched_seq_read_nolock(&clear_seq); - dumper->next_seq = prb_next_seq(prb); - logbuf_unlock_irqrestore(flags); - /* invoke dumper which will iterate over records */ dumper->dump(dumper, reason); } @@ -3421,7 +3414,7 @@ void kmsg_dump(enum kmsg_dump_reason reason) /** * kmsg_dump_get_line_nolock - retrieve one kmsg log line (unlocked version) - * @dumper: registered kmsg dumper + * @iter: kmsg dump iterator * @syslog: include the "<4>" prefixes * @line: buffer to copy the line to * @size: maximum size of the buffer @@ -3438,24 +3431,28 @@ void kmsg_dump(enum kmsg_dump_reason reason) * * The function is similar to kmsg_dump_get_line(), but grabs no locks. */ -bool kmsg_dump_get_line_nolock(struct kmsg_dumper *dumper, bool syslog, +bool kmsg_dump_get_line_nolock(struct kmsg_dump_iter *iter, bool syslog, char *line, size_t size, size_t *len) { + u64 min_seq = latched_seq_read_nolock(&clear_seq); struct printk_info info; unsigned int line_count; struct printk_record r; size_t l = 0; bool ret = false; + if (iter->cur_seq < min_seq) + iter->cur_seq = min_seq; + prb_rec_init_rd(&r, &info, line, size); /* Read text or count text lines? */ if (line) { - if (!prb_read_valid(prb, dumper->cur_seq, &r)) + if (!prb_read_valid(prb, iter->cur_seq, &r)) goto out; l = record_print_text(&r, syslog, printk_time); } else { - if (!prb_read_valid_info(prb, dumper->cur_seq, + if (!prb_read_valid_info(prb, iter->cur_seq, &info, &line_count)) { goto out; } @@ -3464,7 +3461,7 @@ bool kmsg_dump_get_line_nolock(struct kmsg_dumper *dumper, bool syslog, } - dumper->cur_seq = r.info->seq + 1; + iter->cur_seq = r.info->seq + 1; ret = true; out: if (len) @@ -3474,7 +3471,7 @@ out: /** * kmsg_dump_get_line - retrieve one kmsg log line - * @dumper: registered kmsg dumper + * @iter: kmsg dump iterator * @syslog: include the "<4>" prefixes * @line: buffer to copy the line to * @size: maximum size of the buffer @@ -3489,14 +3486,14 @@ out: * A return value of FALSE indicates that there are no more records to * read. */ -bool kmsg_dump_get_line(struct kmsg_dumper *dumper, bool syslog, +bool kmsg_dump_get_line(struct kmsg_dump_iter *iter, bool syslog, char *line, size_t size, size_t *len) { unsigned long flags; bool ret; logbuf_lock_irqsave(flags); - ret = kmsg_dump_get_line_nolock(dumper, syslog, line, size, len); + ret = kmsg_dump_get_line_nolock(iter, syslog, line, size, len); logbuf_unlock_irqrestore(flags); return ret; @@ -3505,7 +3502,7 @@ EXPORT_SYMBOL_GPL(kmsg_dump_get_line); /** * kmsg_dump_get_buffer - copy kmsg log lines - * @dumper: registered kmsg dumper + * @iter: kmsg dump iterator * @syslog: include the "<4>" prefixes * @buf: buffer to copy the line to * @size: maximum size of the buffer @@ -3522,9 +3519,10 @@ EXPORT_SYMBOL_GPL(kmsg_dump_get_line); * A return value of FALSE indicates that there are no more records to * read. */ -bool kmsg_dump_get_buffer(struct kmsg_dumper *dumper, bool syslog, +bool kmsg_dump_get_buffer(struct kmsg_dump_iter *iter, bool syslog, char *buf, size_t size, size_t *len_out) { + u64 min_seq = latched_seq_read_nolock(&clear_seq); struct printk_info info; struct printk_record r; unsigned long flags; @@ -3537,16 +3535,19 @@ bool kmsg_dump_get_buffer(struct kmsg_dumper *dumper, bool syslog, if (!buf || !size) goto out; + if (iter->cur_seq < min_seq) + iter->cur_seq = min_seq; + logbuf_lock_irqsave(flags); - if (prb_read_valid_info(prb, dumper->cur_seq, &info, NULL)) { - if (info.seq != dumper->cur_seq) { + if (prb_read_valid_info(prb, iter->cur_seq, &info, NULL)) { + if (info.seq != iter->cur_seq) { /* messages are gone, move to first available one */ - dumper->cur_seq = info.seq; + iter->cur_seq = info.seq; } } /* last entry */ - if (dumper->cur_seq >= dumper->next_seq) { + if (iter->cur_seq >= iter->next_seq) { logbuf_unlock_irqrestore(flags); goto out; } @@ -3557,7 +3558,7 @@ bool kmsg_dump_get_buffer(struct kmsg_dumper *dumper, bool syslog, * because this function (by way of record_print_text()) will * not write more than size-1 bytes of text into @buf. */ - seq = find_first_fitting_seq(dumper->cur_seq, dumper->next_seq, + seq = find_first_fitting_seq(iter->cur_seq, iter->next_seq, size - 1, syslog, time); /* @@ -3570,7 +3571,7 @@ bool kmsg_dump_get_buffer(struct kmsg_dumper *dumper, bool syslog, len = 0; prb_for_each_record(seq, prb, seq, &r) { - if (r.info->seq >= dumper->next_seq) + if (r.info->seq >= iter->next_seq) break; len += record_print_text(&r, syslog, time); @@ -3579,7 +3580,7 @@ bool kmsg_dump_get_buffer(struct kmsg_dumper *dumper, bool syslog, prb_rec_init_rd(&r, &info, buf + len, size - len); } - dumper->next_seq = next_seq; + iter->next_seq = next_seq; ret = true; logbuf_unlock_irqrestore(flags); out: @@ -3591,7 +3592,7 @@ EXPORT_SYMBOL_GPL(kmsg_dump_get_buffer); /** * kmsg_dump_rewind_nolock - reset the iterator (unlocked version) - * @dumper: registered kmsg dumper + * @iter: kmsg dump iterator * * Reset the dumper's iterator so that kmsg_dump_get_line() and * kmsg_dump_get_buffer() can be called again and used multiple @@ -3599,26 +3600,26 @@ EXPORT_SYMBOL_GPL(kmsg_dump_get_buffer); * * The function is similar to kmsg_dump_rewind(), but grabs no locks. */ -void kmsg_dump_rewind_nolock(struct kmsg_dumper *dumper) +void kmsg_dump_rewind_nolock(struct kmsg_dump_iter *iter) { - dumper->cur_seq = latched_seq_read_nolock(&clear_seq); - dumper->next_seq = prb_next_seq(prb); + iter->cur_seq = latched_seq_read_nolock(&clear_seq); + iter->next_seq = prb_next_seq(prb); } /** * kmsg_dump_rewind - reset the iterator - * @dumper: registered kmsg dumper + * @iter: kmsg dump iterator * * Reset the dumper's iterator so that kmsg_dump_get_line() and * kmsg_dump_get_buffer() can be called again and used multiple * times within the same dumper.dump() callback. */ -void kmsg_dump_rewind(struct kmsg_dumper *dumper) +void kmsg_dump_rewind(struct kmsg_dump_iter *iter) { unsigned long flags; logbuf_lock_irqsave(flags); - kmsg_dump_rewind_nolock(dumper); + kmsg_dump_rewind_nolock(iter); logbuf_unlock_irqrestore(flags); } EXPORT_SYMBOL_GPL(kmsg_dump_rewind); From 996e966640ddea7b535cbe7b749e87a3b24f37e8 Mon Sep 17 00:00:00 2001 From: John Ogness Date: Wed, 3 Mar 2021 11:15:26 +0100 Subject: [PATCH 13/24] printk: remove logbuf_lock Since the ringbuffer is lockless, there is no need for it to be protected by @logbuf_lock. Remove @logbuf_lock. @console_seq, @exclusive_console_stop_seq, @console_dropped are protected by @console_lock. Signed-off-by: John Ogness Reviewed-by: Petr Mladek Signed-off-by: Petr Mladek Link: https://lore.kernel.org/r/20210303101528.29901-14-john.ogness@linutronix.de --- kernel/printk/internal.h | 4 +- kernel/printk/printk.c | 112 ++++++++++++------------------------ kernel/printk/printk_safe.c | 27 +++------ 3 files changed, 46 insertions(+), 97 deletions(-) diff --git a/kernel/printk/internal.h b/kernel/printk/internal.h index 3a8fd491758c..e7acc2888c8e 100644 --- a/kernel/printk/internal.h +++ b/kernel/printk/internal.h @@ -12,8 +12,6 @@ #define PRINTK_NMI_CONTEXT_OFFSET 0x010000000 -extern raw_spinlock_t logbuf_lock; - __printf(4, 0) int vprintk_store(int facility, int level, const struct dev_printk_info *dev_info, @@ -59,7 +57,7 @@ void defer_console_output(void); __printf(1, 0) int vprintk_func(const char *fmt, va_list args) { return 0; } /* - * In !PRINTK builds we still export logbuf_lock spin_lock, console_sem + * In !PRINTK builds we still export console_sem * semaphore and some of console functions (console_unlock()/etc.), so * printk-safe must preserve the existing local IRQ guarantees. */ diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index b49dee256947..8994bc192b88 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -355,41 +355,6 @@ enum log_flags { LOG_CONT = 8, /* text is a fragment of a continuation line */ }; -/* - * The logbuf_lock protects kmsg buffer, indices, counters. This can be taken - * within the scheduler's rq lock. It must be released before calling - * console_unlock() or anything else that might wake up a process. - */ -DEFINE_RAW_SPINLOCK(logbuf_lock); - -/* - * Helper macros to lock/unlock logbuf_lock and switch between - * printk-safe/unsafe modes. - */ -#define logbuf_lock_irq() \ - do { \ - printk_safe_enter_irq(); \ - raw_spin_lock(&logbuf_lock); \ - } while (0) - -#define logbuf_unlock_irq() \ - do { \ - raw_spin_unlock(&logbuf_lock); \ - printk_safe_exit_irq(); \ - } while (0) - -#define logbuf_lock_irqsave(flags) \ - do { \ - printk_safe_enter_irqsave(flags); \ - raw_spin_lock(&logbuf_lock); \ - } while (0) - -#define logbuf_unlock_irqrestore(flags) \ - do { \ - raw_spin_unlock(&logbuf_lock); \ - printk_safe_exit_irqrestore(flags); \ - } while (0) - /* syslog_lock protects syslog_* variables and write access to clear_seq. */ static DEFINE_RAW_SPINLOCK(syslog_lock); @@ -401,6 +366,7 @@ static u64 syslog_seq; static size_t syslog_partial; static bool syslog_time; +/* All 3 protected by @console_sem. */ /* the next printk record to write to the console */ static u64 console_seq; static u64 exclusive_console_stop_seq; @@ -766,27 +732,27 @@ static ssize_t devkmsg_read(struct file *file, char __user *buf, if (ret) return ret; - logbuf_lock_irq(); + printk_safe_enter_irq(); if (!prb_read_valid(prb, atomic64_read(&user->seq), r)) { if (file->f_flags & O_NONBLOCK) { ret = -EAGAIN; - logbuf_unlock_irq(); + printk_safe_exit_irq(); goto out; } - logbuf_unlock_irq(); + printk_safe_exit_irq(); ret = wait_event_interruptible(log_wait, prb_read_valid(prb, atomic64_read(&user->seq), r)); if (ret) goto out; - logbuf_lock_irq(); + printk_safe_enter_irq(); } if (r->info->seq != atomic64_read(&user->seq)) { /* our last seen message is gone, return error and reset */ atomic64_set(&user->seq, r->info->seq); ret = -EPIPE; - logbuf_unlock_irq(); + printk_safe_exit_irq(); goto out; } @@ -796,7 +762,7 @@ static ssize_t devkmsg_read(struct file *file, char __user *buf, &r->info->dev_info); atomic64_set(&user->seq, r->info->seq + 1); - logbuf_unlock_irq(); + printk_safe_exit_irq(); if (len > count) { ret = -EINVAL; @@ -831,7 +797,7 @@ static loff_t devkmsg_llseek(struct file *file, loff_t offset, int whence) if (offset) return -ESPIPE; - logbuf_lock_irq(); + printk_safe_enter_irq(); switch (whence) { case SEEK_SET: /* the first record */ @@ -852,7 +818,7 @@ static loff_t devkmsg_llseek(struct file *file, loff_t offset, int whence) default: ret = -EINVAL; } - logbuf_unlock_irq(); + printk_safe_exit_irq(); return ret; } @@ -867,7 +833,7 @@ static __poll_t devkmsg_poll(struct file *file, poll_table *wait) poll_wait(file, &log_wait, wait); - logbuf_lock_irq(); + printk_safe_enter_irq(); if (prb_read_valid_info(prb, atomic64_read(&user->seq), &info, NULL)) { /* return error when data has vanished underneath us */ if (info.seq != atomic64_read(&user->seq)) @@ -875,7 +841,7 @@ static __poll_t devkmsg_poll(struct file *file, poll_table *wait) else ret = EPOLLIN|EPOLLRDNORM; } - logbuf_unlock_irq(); + printk_safe_exit_irq(); return ret; } @@ -908,9 +874,9 @@ static int devkmsg_open(struct inode *inode, struct file *file) prb_rec_init_rd(&user->record, &user->info, &user->text_buf[0], sizeof(user->text_buf)); - logbuf_lock_irq(); + printk_safe_enter_irq(); atomic64_set(&user->seq, prb_first_valid_seq(prb)); - logbuf_unlock_irq(); + printk_safe_exit_irq(); file->private_data = user; return 0; @@ -1532,11 +1498,11 @@ static int syslog_print(char __user *buf, int size) size_t n; size_t skip; - logbuf_lock_irq(); + printk_safe_enter_irq(); raw_spin_lock(&syslog_lock); if (!prb_read_valid(prb, syslog_seq, &r)) { raw_spin_unlock(&syslog_lock); - logbuf_unlock_irq(); + printk_safe_exit_irq(); break; } if (r.info->seq != syslog_seq) { @@ -1566,7 +1532,7 @@ static int syslog_print(char __user *buf, int size) } else n = 0; raw_spin_unlock(&syslog_lock); - logbuf_unlock_irq(); + printk_safe_exit_irq(); if (!n) break; @@ -1600,7 +1566,7 @@ static int syslog_print_all(char __user *buf, int size, bool clear) return -ENOMEM; time = printk_time; - logbuf_lock_irq(); + printk_safe_enter_irq(); /* * Find first record that fits, including all following records, * into the user-provided buffer for this dump. @@ -1621,12 +1587,12 @@ static int syslog_print_all(char __user *buf, int size, bool clear) break; } - logbuf_unlock_irq(); + printk_safe_exit_irq(); if (copy_to_user(buf + len, text, textlen)) len = -EFAULT; else len += textlen; - logbuf_lock_irq(); + printk_safe_enter_irq(); if (len < 0) break; @@ -1637,7 +1603,7 @@ static int syslog_print_all(char __user *buf, int size, bool clear) latched_seq_write(&clear_seq, seq); raw_spin_unlock(&syslog_lock); } - logbuf_unlock_irq(); + printk_safe_exit_irq(); kfree(text); return len; @@ -1645,11 +1611,11 @@ static int syslog_print_all(char __user *buf, int size, bool clear) static void syslog_clear(void) { - logbuf_lock_irq(); + printk_safe_enter_irq(); raw_spin_lock(&syslog_lock); latched_seq_write(&clear_seq, prb_next_seq(prb)); raw_spin_unlock(&syslog_lock); - logbuf_unlock_irq(); + printk_safe_exit_irq(); } /* Return a consistent copy of @syslog_seq. */ @@ -1737,12 +1703,12 @@ int do_syslog(int type, char __user *buf, int len, int source) break; /* Number of chars in the log buffer */ case SYSLOG_ACTION_SIZE_UNREAD: - logbuf_lock_irq(); + printk_safe_enter_irq(); raw_spin_lock(&syslog_lock); if (!prb_read_valid_info(prb, syslog_seq, &info, NULL)) { /* No unread messages. */ raw_spin_unlock(&syslog_lock); - logbuf_unlock_irq(); + printk_safe_exit_irq(); return 0; } if (info.seq != syslog_seq) { @@ -1771,7 +1737,7 @@ int do_syslog(int type, char __user *buf, int len, int source) error -= syslog_partial; } raw_spin_unlock(&syslog_lock); - logbuf_unlock_irq(); + printk_safe_exit_irq(); break; /* Size of the log buffer */ case SYSLOG_ACTION_SIZE_BUFFER: @@ -2627,7 +2593,6 @@ again: size_t len; printk_safe_enter_irqsave(flags); - raw_spin_lock(&logbuf_lock); skip: if (!prb_read_valid(prb, console_seq, &r)) break; @@ -2671,7 +2636,6 @@ skip: console_msg_format & MSG_FORMAT_SYSLOG, printk_time); console_seq++; - raw_spin_unlock(&logbuf_lock); /* * While actively printing out messages, if another printk() @@ -2698,8 +2662,6 @@ skip: console_locked = 0; - raw_spin_unlock(&logbuf_lock); - up_console_sem(); /* @@ -2708,9 +2670,7 @@ skip: * there's a new owner and the console_unlock() from them will do the * flush, no worries. */ - raw_spin_lock(&logbuf_lock); retry = prb_read_valid(prb, console_seq, NULL); - raw_spin_unlock(&logbuf_lock); printk_safe_exit_irqrestore(flags); if (retry && console_trylock()) @@ -2777,9 +2737,9 @@ void console_flush_on_panic(enum con_flush_mode mode) if (mode == CONSOLE_REPLAY_ALL) { unsigned long flags; - logbuf_lock_irqsave(flags); + printk_safe_enter_irqsave(flags); console_seq = prb_first_valid_seq(prb); - logbuf_unlock_irqrestore(flags); + printk_safe_exit_irqrestore(flags); } console_unlock(); } @@ -3008,7 +2968,7 @@ void register_console(struct console *newcon) * console_unlock(); will print out the buffered messages * for us. */ - logbuf_lock_irqsave(flags); + printk_safe_enter_irqsave(flags); /* * We're about to replay the log buffer. Only do this to the * just-registered console to avoid excessive message spam to @@ -3026,7 +2986,7 @@ void register_console(struct console *newcon) console_seq = syslog_seq; raw_spin_unlock(&syslog_lock); - logbuf_unlock_irqrestore(flags); + printk_safe_exit_irqrestore(flags); } console_unlock(); console_sysfs_notify(); @@ -3492,9 +3452,9 @@ bool kmsg_dump_get_line(struct kmsg_dump_iter *iter, bool syslog, unsigned long flags; bool ret; - logbuf_lock_irqsave(flags); + printk_safe_enter_irqsave(flags); ret = kmsg_dump_get_line_nolock(iter, syslog, line, size, len); - logbuf_unlock_irqrestore(flags); + printk_safe_exit_irqrestore(flags); return ret; } @@ -3538,7 +3498,7 @@ bool kmsg_dump_get_buffer(struct kmsg_dump_iter *iter, bool syslog, if (iter->cur_seq < min_seq) iter->cur_seq = min_seq; - logbuf_lock_irqsave(flags); + printk_safe_enter_irqsave(flags); if (prb_read_valid_info(prb, iter->cur_seq, &info, NULL)) { if (info.seq != iter->cur_seq) { /* messages are gone, move to first available one */ @@ -3548,7 +3508,7 @@ bool kmsg_dump_get_buffer(struct kmsg_dump_iter *iter, bool syslog, /* last entry */ if (iter->cur_seq >= iter->next_seq) { - logbuf_unlock_irqrestore(flags); + printk_safe_exit_irqrestore(flags); goto out; } @@ -3582,7 +3542,7 @@ bool kmsg_dump_get_buffer(struct kmsg_dump_iter *iter, bool syslog, iter->next_seq = next_seq; ret = true; - logbuf_unlock_irqrestore(flags); + printk_safe_exit_irqrestore(flags); out: if (len_out) *len_out = len; @@ -3618,9 +3578,9 @@ void kmsg_dump_rewind(struct kmsg_dump_iter *iter) { unsigned long flags; - logbuf_lock_irqsave(flags); + printk_safe_enter_irqsave(flags); kmsg_dump_rewind_nolock(iter); - logbuf_unlock_irqrestore(flags); + printk_safe_exit_irqrestore(flags); } EXPORT_SYMBOL_GPL(kmsg_dump_rewind); diff --git a/kernel/printk/printk_safe.c b/kernel/printk/printk_safe.c index 2e9e3ed7d63e..bbf5c1993636 100644 --- a/kernel/printk/printk_safe.c +++ b/kernel/printk/printk_safe.c @@ -16,7 +16,7 @@ #include "internal.h" /* - * printk() could not take logbuf_lock in NMI context. Instead, + * In NMI and safe mode, printk() avoids taking locks. Instead, * it uses an alternative implementation that temporary stores * the strings into a per-CPU buffer. The content of the buffer * is later flushed into the main ring buffer via IRQ work. @@ -267,17 +267,9 @@ void printk_safe_flush(void) void printk_safe_flush_on_panic(void) { /* - * Make sure that we could access the main ring buffer. + * Make sure that we could access the safe buffers. * Do not risk a double release when more CPUs are up. */ - if (raw_spin_is_locked(&logbuf_lock)) { - if (num_online_cpus() > 1) - return; - - debug_locks_off(); - raw_spin_lock_init(&logbuf_lock); - } - if (raw_spin_is_locked(&safe_read_lock)) { if (num_online_cpus() > 1) return; @@ -319,9 +311,7 @@ void noinstr printk_nmi_exit(void) * reordering. * * It has effect only when called in NMI context. Then printk() - * will try to store the messages into the main logbuf directly - * and use the per-CPU buffers only as a fallback when the lock - * is not available. + * will store the messages into the main logbuf directly. */ void printk_nmi_direct_enter(void) { @@ -376,20 +366,21 @@ __printf(1, 0) int vprintk_func(const char *fmt, va_list args) #endif /* - * Try to use the main logbuf even in NMI. But avoid calling console + * Use the main logbuf even in NMI. But avoid calling console * drivers that might have their own locks. */ - if ((this_cpu_read(printk_context) & PRINTK_NMI_DIRECT_CONTEXT_MASK) && - raw_spin_trylock(&logbuf_lock)) { + if ((this_cpu_read(printk_context) & PRINTK_NMI_DIRECT_CONTEXT_MASK)) { + unsigned long flags; int len; + printk_safe_enter_irqsave(flags); len = vprintk_store(0, LOGLEVEL_DEFAULT, NULL, fmt, args); - raw_spin_unlock(&logbuf_lock); + printk_safe_exit_irqrestore(flags); defer_console_output(); return len; } - /* Use extra buffer in NMI when logbuf_lock is taken or in safe mode. */ + /* Use extra buffer in NMI. */ if (this_cpu_read(printk_context) & PRINTK_NMI_CONTEXT_MASK) return vprintk_nmi(fmt, args); From a4f987653241db9fa1f99531b430cebb83f1eae1 Mon Sep 17 00:00:00 2001 From: John Ogness Date: Wed, 3 Mar 2021 11:15:27 +0100 Subject: [PATCH 14/24] printk: kmsg_dump: remove _nolock() variants kmsg_dump_rewind() and kmsg_dump_get_line() are lockless, so there is no need for _nolock() variants. Remove these functions and switch all callers of the _nolock() variants. The functions without _nolock() were chosen because they are already exported to kernel modules. Signed-off-by: John Ogness Reviewed-by: Petr Mladek Signed-off-by: Petr Mladek Link: https://lore.kernel.org/r/20210303101528.29901-15-john.ogness@linutronix.de --- arch/powerpc/xmon/xmon.c | 4 +-- include/linux/kmsg_dump.h | 16 ---------- kernel/debug/kdb/kdb_main.c | 8 ++--- kernel/printk/printk.c | 60 +++++-------------------------------- 4 files changed, 14 insertions(+), 74 deletions(-) diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index dbcf403b1d8d..dd0e0aac954e 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -3017,9 +3017,9 @@ dump_log_buf(void) catch_memory_errors = 1; sync(); - kmsg_dump_rewind_nolock(&iter); + kmsg_dump_rewind(&iter); xmon_start_pagination(); - while (kmsg_dump_get_line_nolock(&iter, false, buf, sizeof(buf), &len)) { + while (kmsg_dump_get_line(&iter, false, buf, sizeof(buf), &len)) { buf[len] = '\0'; printf("%s", buf); } diff --git a/include/linux/kmsg_dump.h b/include/linux/kmsg_dump.h index 36c8c57e1051..906521c2329c 100644 --- a/include/linux/kmsg_dump.h +++ b/include/linux/kmsg_dump.h @@ -57,17 +57,12 @@ struct kmsg_dumper { #ifdef CONFIG_PRINTK void kmsg_dump(enum kmsg_dump_reason reason); -bool kmsg_dump_get_line_nolock(struct kmsg_dump_iter *iter, bool syslog, - char *line, size_t size, size_t *len); - bool kmsg_dump_get_line(struct kmsg_dump_iter *iter, bool syslog, char *line, size_t size, size_t *len); bool kmsg_dump_get_buffer(struct kmsg_dump_iter *iter, bool syslog, char *buf, size_t size, size_t *len_out); -void kmsg_dump_rewind_nolock(struct kmsg_dump_iter *iter); - void kmsg_dump_rewind(struct kmsg_dump_iter *iter); int kmsg_dump_register(struct kmsg_dumper *dumper); @@ -80,13 +75,6 @@ static inline void kmsg_dump(enum kmsg_dump_reason reason) { } -static inline bool kmsg_dump_get_line_nolock(struct kmsg_dump_iter *iter, - bool syslog, const char *line, - size_t size, size_t *len) -{ - return false; -} - static inline bool kmsg_dump_get_line(struct kmsg_dump_iter *iter, bool syslog, const char *line, size_t size, size_t *len) { @@ -99,10 +87,6 @@ static inline bool kmsg_dump_get_buffer(struct kmsg_dump_iter *iter, bool syslog return false; } -static inline void kmsg_dump_rewind_nolock(struct kmsg_dump_iter *iter) -{ -} - static inline void kmsg_dump_rewind(struct kmsg_dump_iter *iter) { } diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c index 8544d7a55a57..67d9f2403b52 100644 --- a/kernel/debug/kdb/kdb_main.c +++ b/kernel/debug/kdb/kdb_main.c @@ -2126,8 +2126,8 @@ static int kdb_dmesg(int argc, const char **argv) kdb_set(2, setargs); } - kmsg_dump_rewind_nolock(&iter); - while (kmsg_dump_get_line_nolock(&iter, 1, NULL, 0, NULL)) + kmsg_dump_rewind(&iter); + while (kmsg_dump_get_line(&iter, 1, NULL, 0, NULL)) n++; if (lines < 0) { @@ -2159,8 +2159,8 @@ static int kdb_dmesg(int argc, const char **argv) if (skip >= n || skip < 0) return 0; - kmsg_dump_rewind_nolock(&iter); - while (kmsg_dump_get_line_nolock(&iter, 1, buf, sizeof(buf), &len)) { + kmsg_dump_rewind(&iter); + while (kmsg_dump_get_line(&iter, 1, buf, sizeof(buf), &len)) { if (skip) { skip--; continue; diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 8994bc192b88..602de86d4e76 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -3373,7 +3373,7 @@ void kmsg_dump(enum kmsg_dump_reason reason) } /** - * kmsg_dump_get_line_nolock - retrieve one kmsg log line (unlocked version) + * kmsg_dump_get_line - retrieve one kmsg log line * @iter: kmsg dump iterator * @syslog: include the "<4>" prefixes * @line: buffer to copy the line to @@ -3388,22 +3388,22 @@ void kmsg_dump(enum kmsg_dump_reason reason) * * A return value of FALSE indicates that there are no more records to * read. - * - * The function is similar to kmsg_dump_get_line(), but grabs no locks. */ -bool kmsg_dump_get_line_nolock(struct kmsg_dump_iter *iter, bool syslog, - char *line, size_t size, size_t *len) +bool kmsg_dump_get_line(struct kmsg_dump_iter *iter, bool syslog, + char *line, size_t size, size_t *len) { u64 min_seq = latched_seq_read_nolock(&clear_seq); struct printk_info info; unsigned int line_count; struct printk_record r; + unsigned long flags; size_t l = 0; bool ret = false; if (iter->cur_seq < min_seq) iter->cur_seq = min_seq; + printk_safe_enter_irqsave(flags); prb_rec_init_rd(&r, &info, line, size); /* Read text or count text lines? */ @@ -3424,40 +3424,11 @@ bool kmsg_dump_get_line_nolock(struct kmsg_dump_iter *iter, bool syslog, iter->cur_seq = r.info->seq + 1; ret = true; out: + printk_safe_exit_irqrestore(flags); if (len) *len = l; return ret; } - -/** - * kmsg_dump_get_line - retrieve one kmsg log line - * @iter: kmsg dump iterator - * @syslog: include the "<4>" prefixes - * @line: buffer to copy the line to - * @size: maximum size of the buffer - * @len: length of line placed into buffer - * - * Start at the beginning of the kmsg buffer, with the oldest kmsg - * record, and copy one record into the provided buffer. - * - * Consecutive calls will return the next available record moving - * towards the end of the buffer with the youngest messages. - * - * A return value of FALSE indicates that there are no more records to - * read. - */ -bool kmsg_dump_get_line(struct kmsg_dump_iter *iter, bool syslog, - char *line, size_t size, size_t *len) -{ - unsigned long flags; - bool ret; - - printk_safe_enter_irqsave(flags); - ret = kmsg_dump_get_line_nolock(iter, syslog, line, size, len); - printk_safe_exit_irqrestore(flags); - - return ret; -} EXPORT_SYMBOL_GPL(kmsg_dump_get_line); /** @@ -3550,22 +3521,6 @@ out: } EXPORT_SYMBOL_GPL(kmsg_dump_get_buffer); -/** - * kmsg_dump_rewind_nolock - reset the iterator (unlocked version) - * @iter: kmsg dump iterator - * - * Reset the dumper's iterator so that kmsg_dump_get_line() and - * kmsg_dump_get_buffer() can be called again and used multiple - * times within the same dumper.dump() callback. - * - * The function is similar to kmsg_dump_rewind(), but grabs no locks. - */ -void kmsg_dump_rewind_nolock(struct kmsg_dump_iter *iter) -{ - iter->cur_seq = latched_seq_read_nolock(&clear_seq); - iter->next_seq = prb_next_seq(prb); -} - /** * kmsg_dump_rewind - reset the iterator * @iter: kmsg dump iterator @@ -3579,7 +3534,8 @@ void kmsg_dump_rewind(struct kmsg_dump_iter *iter) unsigned long flags; printk_safe_enter_irqsave(flags); - kmsg_dump_rewind_nolock(iter); + iter->cur_seq = latched_seq_read_nolock(&clear_seq); + iter->next_seq = prb_next_seq(prb); printk_safe_exit_irqrestore(flags); } EXPORT_SYMBOL_GPL(kmsg_dump_rewind); From 505a27a7342912f25ef3d9dabd6a08857e76efc2 Mon Sep 17 00:00:00 2001 From: John Ogness Date: Wed, 3 Mar 2021 11:15:28 +0100 Subject: [PATCH 15/24] printk: console: remove unnecessary safe buffer usage Upon registering a console, safe buffers are activated when setting up the sequence number to replay the log. However, these are already protected by @console_sem and @syslog_lock. Remove the unnecessary safe buffer usage. Signed-off-by: John Ogness Reviewed-by: Petr Mladek Signed-off-by: Petr Mladek Link: https://lore.kernel.org/r/20210303101528.29901-16-john.ogness@linutronix.de --- kernel/printk/printk.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 602de86d4e76..2f829fbf0a13 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -2967,9 +2967,7 @@ void register_console(struct console *newcon) /* * console_unlock(); will print out the buffered messages * for us. - */ - printk_safe_enter_irqsave(flags); - /* + * * We're about to replay the log buffer. Only do this to the * just-registered console to avoid excessive message spam to * the already-registered consoles. @@ -2982,11 +2980,9 @@ void register_console(struct console *newcon) exclusive_console_stop_seq = console_seq; /* Get a consistent copy of @syslog_seq. */ - raw_spin_lock(&syslog_lock); + raw_spin_lock_irqsave(&syslog_lock, flags); console_seq = syslog_seq; - raw_spin_unlock(&syslog_lock); - - printk_safe_exit_irqrestore(flags); + raw_spin_unlock_irqrestore(&syslog_lock, flags); } console_unlock(); console_sysfs_notify(); From 9f961c2e08741579aa53095d0dbffbcb25a9ae66 Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Fri, 5 Mar 2021 20:42:05 +0100 Subject: [PATCH 16/24] lib/vsprintf: do not show no_hash_pointers message multiple times Do not show no_hash_pointers message multiple times if the option was passed more than once (e.g. via generated command line). Signed-off-by: Marco Elver Reviewed-by: Petr Mladek Signed-off-by: Petr Mladek Link: https://lore.kernel.org/r/20210305194206.3165917-1-elver@google.com --- lib/vsprintf.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/lib/vsprintf.c b/lib/vsprintf.c index 41ddc353ebb8..4a14889ccb35 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -2096,6 +2096,9 @@ EXPORT_SYMBOL_GPL(no_hash_pointers); static int __init no_hash_pointers_enable(char *str) { + if (no_hash_pointers) + return 0; + no_hash_pointers = true; pr_warn("**********************************************************\n"); From ea35d8677811296730e762a2888cda3f01d13a89 Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Fri, 19 Mar 2021 14:45:08 +0900 Subject: [PATCH 17/24] MAINTAINERS: update Senozhatsky email address I don't check my @gmail.com addresses often enough these days. Signed-off-by: Sergey Senozhatsky Signed-off-by: Petr Mladek Link: https://lore.kernel.org/r/20210319054508.124762-1-senozhatsky@chromium.org --- MAINTAINERS | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index a9efff1a18c6..e626953f5ed8 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -14319,7 +14319,7 @@ F: kernel/sched/psi.c PRINTK M: Petr Mladek -M: Sergey Senozhatsky +M: Sergey Senozhatsky R: Steven Rostedt R: John Ogness S: Maintained @@ -19156,7 +19156,7 @@ F: drivers/net/vrf.c VSPRINTF M: Petr Mladek M: Steven Rostedt -M: Sergey Senozhatsky +M: Sergey Senozhatsky R: Andy Shevchenko R: Rasmus Villemoes S: Maintained @@ -19807,7 +19807,7 @@ F: drivers/staging/media/zoran/ ZRAM COMPRESSED RAM BLOCK DEVICE DRVIER M: Minchan Kim M: Nitin Gupta -R: Sergey Senozhatsky +R: Sergey Senozhatsky L: linux-kernel@vger.kernel.org S: Maintained F: Documentation/admin-guide/blockdev/zram.rst @@ -19821,7 +19821,7 @@ F: drivers/tty/serial/zs.* ZSMALLOC COMPRESSED SLAB MEMORY ALLOCATOR M: Minchan Kim M: Nitin Gupta -R: Sergey Senozhatsky +R: Sergey Senozhatsky L: linux-mm@kvack.org S: Maintained F: Documentation/vm/zsmalloc.rst From 4a8ef190c16f724a0bc23ef71bda44b8aef4afaa Mon Sep 17 00:00:00 2001 From: Yafang Shao Date: Fri, 19 Mar 2021 18:12:44 +0800 Subject: [PATCH 18/24] mm, slub: use pGp to print page flags As pGp has been already introduced in printk, we'd better use it to make the output human readable. Before this change, the output is, [ 6155.716018] INFO: Slab 0x000000004027dd4f objects=33 used=3 fp=0x000000008cd1579c flags=0x17ffffc0010200 While after this change, the output is, [ 8846.517809] INFO: Slab 0x00000000f42a2c60 objects=33 used=3 fp=0x0000000060d32ca8 flags=0x17ffffc0010200(slab|head) Signed-off-by: Yafang Shao Reviewed-by: David Hildenbrand Reviewed-by: Vlastimil Babka Acked-by: David Rientjes Acked-by: Christoph Lameter Reviewed-by: Matthew Wilcox (Oracle) Reviewed-by: Miaohe Lin Reviewed-by: Andy Shevchenko Reviewed-by: Sergey Senozhatsky Signed-off-by: Petr Mladek Link: https://lore.kernel.org/r/20210319101246.73513-2-laoar.shao@gmail.com --- mm/slub.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index f5baf429654f..91eea1930cc1 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -641,8 +641,9 @@ void print_tracking(struct kmem_cache *s, void *object) static void print_page_info(struct page *page) { - pr_err("INFO: Slab 0x%p objects=%u used=%u fp=0x%p flags=0x%04lx\n", - page, page->objects, page->inuse, page->freelist, page->flags); + pr_err("INFO: Slab 0x%p objects=%u used=%u fp=0x%p flags=%#lx(%pGp)\n", + page, page->objects, page->inuse, page->freelist, + page->flags, &page->flags); } From 96b94abc12e6ec648f770776522de20c073c6eca Mon Sep 17 00:00:00 2001 From: Yafang Shao Date: Fri, 19 Mar 2021 18:12:45 +0800 Subject: [PATCH 19/24] mm, slub: don't combine pr_err with INFO It is strange to combine "pr_err" with "INFO", so let's remove the prefix completely. This patch is motivated by David's comment[1]. - before the patch [ 8846.517809] INFO: Slab 0x00000000f42a2c60 objects=33 used=3 fp=0x0000000060d32ca8 flags=0x17ffffc0010200(slab|head) - after the patch [ 6343.396602] Slab 0x000000004382e02b objects=33 used=3 fp=0x000000009ae06ffc flags=0x17ffffc0010200(slab|head) [1] https://lore.kernel.org/linux-mm/b9c0f2b6-e9b0-0c36-ebdd-2bc684c5a762@redhat.com/#t Suggested-by: Vlastimil Babka Signed-off-by: Yafang Shao Acked-by: Vlastimil Babka Reviewed-by: Miaohe Lin Reviewed-by: Andy Shevchenko Reviewed-by: David Hildenbrand Acked-by: David Rientjes Cc: Matthew Wilcox Reviewed-by: Sergey Senozhatsky Signed-off-by: Petr Mladek Link: https://lore.kernel.org/r/20210319101246.73513-3-laoar.shao@gmail.com --- mm/slub.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index 91eea1930cc1..5eb120fdde8b 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -615,7 +615,7 @@ static void print_track(const char *s, struct track *t, unsigned long pr_time) if (!t->addr) return; - pr_err("INFO: %s in %pS age=%lu cpu=%u pid=%d\n", + pr_err("%s in %pS age=%lu cpu=%u pid=%d\n", s, (void *)t->addr, pr_time - t->when, t->cpu, t->pid); #ifdef CONFIG_STACKTRACE { @@ -641,7 +641,7 @@ void print_tracking(struct kmem_cache *s, void *object) static void print_page_info(struct page *page) { - pr_err("INFO: Slab 0x%p objects=%u used=%u fp=0x%p flags=%#lx(%pGp)\n", + pr_err("Slab 0x%p objects=%u used=%u fp=0x%p flags=%#lx(%pGp)\n", page, page->objects, page->inuse, page->freelist, page->flags, &page->flags); @@ -698,7 +698,7 @@ static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p) print_page_info(page); - pr_err("INFO: Object 0x%p @offset=%tu fp=0x%p\n\n", + pr_err("Object 0x%p @offset=%tu fp=0x%p\n\n", p, p - addr, get_freepointer(s, p)); if (s->flags & SLAB_RED_ZONE) @@ -791,7 +791,7 @@ static int check_bytes_and_report(struct kmem_cache *s, struct page *page, end--; slab_bug(s, "%s overwritten", what); - pr_err("INFO: 0x%p-0x%p @offset=%tu. First byte 0x%x instead of 0x%x\n", + pr_err("0x%p-0x%p @offset=%tu. First byte 0x%x instead of 0x%x\n", fault, end - 1, fault - addr, fault[0], value); print_trailer(s, page, object); @@ -3869,7 +3869,7 @@ static void list_slab_objects(struct kmem_cache *s, struct page *page, for_each_object(p, s, addr, page->objects) { if (!test_bit(__obj_to_index(s, addr, p), map)) { - pr_err("INFO: Object 0x%p @offset=%tu\n", p, p - addr); + pr_err("Object 0x%p @offset=%tu\n", p, p - addr); print_tracking(s, p); } } From c244297acbe51f1db5764966c02cdbd69927f218 Mon Sep 17 00:00:00 2001 From: Yafang Shao Date: Fri, 19 Mar 2021 18:12:46 +0800 Subject: [PATCH 20/24] vsprintf: dump full information of page flags in pGp Currently the pGp only shows the names of page flags, rather than the full information including section, node, zone, last cpupid and kasan tag. While it is not easy to parse these information manually because there're so many flavors. Let's interpret them in pGp as well. To be compitable with the existed format of pGp, the new introduced ones also use '|' as the separator, then the user tools parsing pGp won't need to make change, suggested by Matthew. The new information is tracked onto the end of the existed one. On example of the output in mm/slub.c as follows, - Before the patch, [ 6343.396602] Slab 0x000000004382e02b objects=33 used=3 fp=0x000000009ae06ffc flags=0x17ffffc0010200(slab|head) - After the patch, [ 8448.272530] Slab 0x0000000090797883 objects=33 used=3 fp=0x00000000790f1c26 flags=0x17ffffc0010200(slab|head|node=0|zone=2|lastcpupid=0x1fffff) The documentation and test cases are also updated. The output of the test cases as follows, [68599.816764] test_printf: loaded. [68599.819068] test_printf: all 388 tests passed [68599.830367] test_printf: unloaded. [lkp@intel.com: reported issues in the prev version in test_printf.c] Signed-off-by: Yafang Shao Cc: David Hildenbrand Cc: Joe Perches Cc: Miaohe Lin Cc: Vlastimil Babka Cc: Andy Shevchenko Cc: Matthew Wilcox Cc: Petr Mladek Cc: kernel test robot Reviewed-by: Sergey Senozhatsky Reviewed-by: Petr Mladek Signed-off-by: Petr Mladek Link: https://lore.kernel.org/r/20210319101246.73513-4-laoar.shao@gmail.com --- Documentation/core-api/printk-formats.rst | 2 +- lib/test_printf.c | 90 ++++++++++++++++++++--- lib/vsprintf.c | 66 +++++++++++++++-- 3 files changed, 142 insertions(+), 16 deletions(-) diff --git a/Documentation/core-api/printk-formats.rst b/Documentation/core-api/printk-formats.rst index 160e710d992f..00d07c7eefd4 100644 --- a/Documentation/core-api/printk-formats.rst +++ b/Documentation/core-api/printk-formats.rst @@ -540,7 +540,7 @@ Flags bitfields such as page flags, gfp_flags :: - %pGp referenced|uptodate|lru|active|private + %pGp referenced|uptodate|lru|active|private|node=0|zone=2|lastcpupid=0x1fffff %pGg GFP_USER|GFP_DMA32|GFP_NOWARN %pGv read|exec|mayread|maywrite|mayexec|denywrite diff --git a/lib/test_printf.c b/lib/test_printf.c index 95a2f82427c7..27b964ec723d 100644 --- a/lib/test_printf.c +++ b/lib/test_printf.c @@ -577,24 +577,98 @@ netdev_features(void) { } +struct page_flags_test { + int width; + int shift; + int mask; + unsigned long value; + const char *fmt; + const char *name; +}; + +static struct page_flags_test pft[] = { + {SECTIONS_WIDTH, SECTIONS_PGSHIFT, SECTIONS_MASK, + 0, "%d", "section"}, + {NODES_WIDTH, NODES_PGSHIFT, NODES_MASK, + 0, "%d", "node"}, + {ZONES_WIDTH, ZONES_PGSHIFT, ZONES_MASK, + 0, "%d", "zone"}, + {LAST_CPUPID_WIDTH, LAST_CPUPID_PGSHIFT, LAST_CPUPID_MASK, + 0, "%#x", "lastcpupid"}, + {KASAN_TAG_WIDTH, KASAN_TAG_PGSHIFT, KASAN_TAG_MASK, + 0, "%#x", "kasantag"}, +}; + +static void __init +page_flags_test(int section, int node, int zone, int last_cpupid, + int kasan_tag, int flags, const char *name, char *cmp_buf) +{ + unsigned long values[] = {section, node, zone, last_cpupid, kasan_tag}; + unsigned long page_flags = 0; + unsigned long size = 0; + bool append = false; + int i; + + flags &= BIT(NR_PAGEFLAGS) - 1; + if (flags) { + page_flags |= flags; + snprintf(cmp_buf + size, BUF_SIZE - size, "%s", name); + size = strlen(cmp_buf); +#if SECTIONS_WIDTH || NODES_WIDTH || ZONES_WIDTH || \ + LAST_CPUPID_WIDTH || KASAN_TAG_WIDTH + /* Other information also included in page flags */ + snprintf(cmp_buf + size, BUF_SIZE - size, "|"); + size = strlen(cmp_buf); +#endif + } + + /* Set the test value */ + for (i = 0; i < ARRAY_SIZE(pft); i++) + pft[i].value = values[i]; + + for (i = 0; i < ARRAY_SIZE(pft); i++) { + if (!pft[i].width) + continue; + + if (append) { + snprintf(cmp_buf + size, BUF_SIZE - size, "|"); + size = strlen(cmp_buf); + } + + page_flags |= (pft[i].value & pft[i].mask) << pft[i].shift; + snprintf(cmp_buf + size, BUF_SIZE - size, "%s=", pft[i].name); + size = strlen(cmp_buf); + snprintf(cmp_buf + size, BUF_SIZE - size, pft[i].fmt, + pft[i].value & pft[i].mask); + size = strlen(cmp_buf); + append = true; + } + + test(cmp_buf, "%pGp", &page_flags); +} + static void __init flags(void) { unsigned long flags; - gfp_t gfp; char *cmp_buffer; + gfp_t gfp; + + cmp_buffer = kmalloc(BUF_SIZE, GFP_KERNEL); + if (!cmp_buffer) + return; flags = 0; - test("", "%pGp", &flags); + page_flags_test(0, 0, 0, 0, 0, flags, "", cmp_buffer); - /* Page flags should filter the zone id */ flags = 1UL << NR_PAGEFLAGS; - test("", "%pGp", &flags); + page_flags_test(0, 0, 0, 0, 0, flags, "", cmp_buffer); flags |= 1UL << PG_uptodate | 1UL << PG_dirty | 1UL << PG_lru | 1UL << PG_active | 1UL << PG_swapbacked; - test("uptodate|dirty|lru|active|swapbacked", "%pGp", &flags); - + page_flags_test(1, 1, 1, 0x1fffff, 1, flags, + "uptodate|dirty|lru|active|swapbacked", + cmp_buffer); flags = VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC | VM_DENYWRITE; @@ -609,10 +683,6 @@ flags(void) gfp = __GFP_ATOMIC; test("__GFP_ATOMIC", "%pGg", &gfp); - cmp_buffer = kmalloc(BUF_SIZE, GFP_KERNEL); - if (!cmp_buffer) - return; - /* Any flags not translated by the table should remain numeric */ gfp = ~__GFP_BITS_MASK; snprintf(cmp_buffer, BUF_SIZE, "%#lx", (unsigned long) gfp); diff --git a/lib/vsprintf.c b/lib/vsprintf.c index 41ddc353ebb8..92e6085eef15 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -1916,6 +1916,66 @@ char *format_flags(char *buf, char *end, unsigned long flags, return buf; } +struct page_flags_fields { + int width; + int shift; + int mask; + const struct printf_spec *spec; + const char *name; +}; + +static const struct page_flags_fields pff[] = { + {SECTIONS_WIDTH, SECTIONS_PGSHIFT, SECTIONS_MASK, + &default_dec_spec, "section"}, + {NODES_WIDTH, NODES_PGSHIFT, NODES_MASK, + &default_dec_spec, "node"}, + {ZONES_WIDTH, ZONES_PGSHIFT, ZONES_MASK, + &default_dec_spec, "zone"}, + {LAST_CPUPID_WIDTH, LAST_CPUPID_PGSHIFT, LAST_CPUPID_MASK, + &default_flag_spec, "lastcpupid"}, + {KASAN_TAG_WIDTH, KASAN_TAG_PGSHIFT, KASAN_TAG_MASK, + &default_flag_spec, "kasantag"}, +}; + +static +char *format_page_flags(char *buf, char *end, unsigned long flags) +{ + unsigned long main_flags = flags & (BIT(NR_PAGEFLAGS) - 1); + bool append = false; + int i; + + /* Page flags from the main area. */ + if (main_flags) { + buf = format_flags(buf, end, main_flags, pageflag_names); + append = true; + } + + /* Page flags from the fields area */ + for (i = 0; i < ARRAY_SIZE(pff); i++) { + /* Skip undefined fields. */ + if (!pff[i].width) + continue; + + /* Format: Flag Name + '=' (equals sign) + Number + '|' (separator) */ + if (append) { + if (buf < end) + *buf = '|'; + buf++; + } + + buf = string(buf, end, pff[i].name, default_str_spec); + if (buf < end) + *buf = '='; + buf++; + buf = number(buf, end, (flags >> pff[i].shift) & pff[i].mask, + *pff[i].spec); + + append = true; + } + + return buf; +} + static noinline_for_stack char *flags_string(char *buf, char *end, void *flags_ptr, struct printf_spec spec, const char *fmt) @@ -1928,11 +1988,7 @@ char *flags_string(char *buf, char *end, void *flags_ptr, switch (fmt[1]) { case 'p': - flags = *(unsigned long *)flags_ptr; - /* Remove zone id */ - flags &= (1UL << NR_PAGEFLAGS) - 1; - names = pageflag_names; - break; + return format_page_flags(buf, end, *(unsigned long *)flags_ptr); case 'v': flags = *(unsigned long *)flags_ptr; names = vmaflag_names; From 28e1745b9fa23f71f465f6b65f966a1ef65af517 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Tue, 23 Mar 2021 15:42:01 +0100 Subject: [PATCH 21/24] printk: rename vprintk_func to vprintk The printk code is already hard enough to understand. Remove an unnecessary indirection by renaming vprintk_func to vprintk (adding the asmlinkage annotation), and removing the vprintk definition from printk.c. That way, printk is implemented in terms of vprintk as one would expect, and there's no "vprintk_func, what's that? Some function pointer that gets set where?" The declaration of vprintk in linux/printk.h already has the __printf(1,0) attribute, there's no point repeating that with the definition - it's for diagnostics in callers. linux/printk.h already contains a static inline {return 0;} definition of vprintk when !CONFIG_PRINTK. Since the corresponding stub definition of vprintk_func was not marked "static inline", any translation unit including internal.h would get a definition of vprintk_func - it just so happens that for !CONFIG_PRINTK, there is precisely one such TU, namely printk.c. Had there been more, it would be a link error; now it's just a silly waste of a few bytes of .text, which one must assume are rather precious to anyone disabling PRINTK. $ objdump -dr kernel/printk/printk.o 00000330 : 330: 31 c0 xor %eax,%eax 332: c3 ret 333: 8d b4 26 00 00 00 00 lea 0x0(%esi,%eiz,1),%esi 33a: 8d b6 00 00 00 00 lea 0x0(%esi),%esi Signed-off-by: Rasmus Villemoes Reviewed-by: Steven Rostedt (VMware) Reviewed-by: Petr Mladek Signed-off-by: Petr Mladek Link: https://lore.kernel.org/r/20210323144201.486050-1-linux@rasmusvillemoes.dk --- kernel/printk/internal.h | 3 --- kernel/printk/printk.c | 8 +------- kernel/printk/printk_safe.c | 3 ++- 3 files changed, 3 insertions(+), 11 deletions(-) diff --git a/kernel/printk/internal.h b/kernel/printk/internal.h index e7acc2888c8e..51615c909b2f 100644 --- a/kernel/printk/internal.h +++ b/kernel/printk/internal.h @@ -19,7 +19,6 @@ int vprintk_store(int facility, int level, __printf(1, 0) int vprintk_default(const char *fmt, va_list args); __printf(1, 0) int vprintk_deferred(const char *fmt, va_list args); -__printf(1, 0) int vprintk_func(const char *fmt, va_list args); void __printk_safe_enter(void); void __printk_safe_exit(void); @@ -54,8 +53,6 @@ void defer_console_output(void); #else -__printf(1, 0) int vprintk_func(const char *fmt, va_list args) { return 0; } - /* * In !PRINTK builds we still export console_sem * semaphore and some of console functions (console_unlock()/etc.), so diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 2f829fbf0a13..45ae60f3e489 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -2180,12 +2180,6 @@ asmlinkage int vprintk_emit(int facility, int level, } EXPORT_SYMBOL(vprintk_emit); -asmlinkage int vprintk(const char *fmt, va_list args) -{ - return vprintk_func(fmt, args); -} -EXPORT_SYMBOL(vprintk); - int vprintk_default(const char *fmt, va_list args) { return vprintk_emit(0, LOGLEVEL_DEFAULT, NULL, fmt, args); @@ -2219,7 +2213,7 @@ asmlinkage __visible int printk(const char *fmt, ...) int r; va_start(args, fmt); - r = vprintk_func(fmt, args); + r = vprintk(fmt, args); va_end(args); return r; diff --git a/kernel/printk/printk_safe.c b/kernel/printk/printk_safe.c index bbf5c1993636..7a1414622051 100644 --- a/kernel/printk/printk_safe.c +++ b/kernel/printk/printk_safe.c @@ -357,7 +357,7 @@ void __printk_safe_exit(void) this_cpu_dec(printk_context); } -__printf(1, 0) int vprintk_func(const char *fmt, va_list args) +asmlinkage int vprintk(const char *fmt, va_list args) { #ifdef CONFIG_KGDB_KDB /* Allow to pass printk() to kdb but avoid a recursion. */ @@ -411,3 +411,4 @@ void __init printk_safe_init(void) /* Flush pending messages that did not have scheduled IRQ works. */ printk_safe_flush(); } +EXPORT_SYMBOL(vprintk); From acebb5597ff182e52a225400a459052a70dae706 Mon Sep 17 00:00:00 2001 From: Bhaskar Chowdhury Date: Sun, 28 Mar 2021 10:09:32 +0530 Subject: [PATCH 22/24] kernel/printk.c: Fixed mundane typos s/sempahore/semaphore/ s/exacly/exactly/ s/unregistred/unregistered/ s/interation/iteration/ Signed-off-by: Bhaskar Chowdhury Reviewed-by: Petr Mladek [pmladek@suse.com: Removed 4th hunk. The string has already been removed in the meantime.] Signed-off-by: Petr Mladek Link: https://lore.kernel.org/r/20210328043932.8310-1-unixbhaskar@gmail.com --- kernel/printk/printk.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 45ae60f3e489..421c35571797 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -262,7 +262,7 @@ static void __up_console_sem(unsigned long ip) * definitely not the perfect debug tool (we don't know if _WE_ * hold it and are racing, but it helps tracking those weird code * paths in the console code where we end up in places I want - * locked without the console sempahore held). + * locked without the console semaphore held). */ static int console_locked, console_suspended; @@ -2331,7 +2331,7 @@ static int __init console_setup(char *str) /* * console="" or console=null have been suggested as a way to * disable console output. Use ttynull that has been created - * for exacly this purpose. + * for exactly this purpose. */ if (str[0] == 0 || strcmp(str, "null") == 0) { __add_preferred_console("ttynull", 0, NULL, NULL, true); @@ -3106,7 +3106,7 @@ void __init console_init(void) * * To mitigate this problem somewhat, only unregister consoles whose memory * intersects with the init section. Note that all other boot consoles will - * get unregistred when the real preferred console is registered. + * get unregistered when the real preferred console is registered. */ static int __init printk_late_init(void) { From a48849e2358ecf1a347a03b33dc27b9b2f25f8fd Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Thu, 25 Feb 2021 17:46:39 +0100 Subject: [PATCH 23/24] printk: clarify the documentation for plain pointer printing We have several modifiers for plain pointers (%p, %px and %pK) and now also the no_hash_pointers boot parameter. The documentation should help to choose which variant to use. Importantly, we should discourage %px in favor of %p (with the new boot parameter when debugging), and stress that %pK should be only used for procfs and similar files, not dmesg buffer. This patch clarifies the documentation in that regard. Signed-off-by: Vlastimil Babka Reviewed-by: Matthew Wilcox (Oracle) Reviewed-by: Petr Mladek Signed-off-by: Petr Mladek Link: https://lore.kernel.org/r/20210225164639.27212-1-vbabka@suse.cz --- Documentation/core-api/printk-formats.rst | 26 ++++++++++++++++++++++- lib/vsprintf.c | 7 ++++-- 2 files changed, 30 insertions(+), 3 deletions(-) diff --git a/Documentation/core-api/printk-formats.rst b/Documentation/core-api/printk-formats.rst index 160e710d992f..6724adf58082 100644 --- a/Documentation/core-api/printk-formats.rst +++ b/Documentation/core-api/printk-formats.rst @@ -79,7 +79,19 @@ Pointers printed without a specifier extension (i.e unadorned %p) are hashed to prevent leaking information about the kernel memory layout. This has the added benefit of providing a unique identifier. On 64-bit machines the first 32 bits are zeroed. The kernel will print ``(ptrval)`` until it -gathers enough entropy. If you *really* want the address see %px below. +gathers enough entropy. + +When possible, use specialised modifiers such as %pS or %pB (described below) +to avoid the need of providing an unhashed address that has to be interpreted +post-hoc. If not possible, and the aim of printing the address is to provide +more information for debugging, use %p and boot the kernel with the +``no_hash_pointers`` parameter during debugging, which will print all %p +addresses unmodified. If you *really* always want the unmodified address, see +%px below. + +If (and only if) you are printing addresses as a content of a virtual file in +e.g. procfs or sysfs (using e.g. seq_printf(), not printk()) read by a +userspace process, use the %pK modifier described below instead of %p or %px. Error Pointers -------------- @@ -139,6 +151,11 @@ For printing kernel pointers which should be hidden from unprivileged users. The behaviour of %pK depends on the kptr_restrict sysctl - see Documentation/admin-guide/sysctl/kernel.rst for more details. +This modifier is *only* intended when producing content of a file read by +userspace from e.g. procfs or sysfs, not for dmesg. Please refer to the +section about %p above for discussion about how to manage hashing pointers +in printk(). + Unmodified Addresses -------------------- @@ -153,6 +170,13 @@ equivalent to %lx (or %lu). %px is preferred because it is more uniquely grep'able. If in the future we need to modify the way the kernel handles printing pointers we will be better equipped to find the call sites. +Before using %px, consider if using %p is sufficient together with enabling the +``no_hash_pointers`` kernel parameter during debugging sessions (see the %p +description above). One valid scenario for %px might be printing information +immediately before a panic, which prevents any sensitive information to be +exploited anyway, and with %px there would be no need to reproduce the panic +with no_hash_pointers. + Pointer Differences ------------------- diff --git a/lib/vsprintf.c b/lib/vsprintf.c index 4a14889ccb35..5ec8ad238d03 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -2189,7 +2189,9 @@ early_param("no_hash_pointers", no_hash_pointers_enable); * Implements a "recursive vsnprintf". * Do not use this feature without some mechanism to verify the * correctness of the format string and va_list arguments. - * - 'K' For a kernel pointer that should be hidden from unprivileged users + * - 'K' For a kernel pointer that should be hidden from unprivileged users. + * Use only for procfs, sysfs and similar files, not printk(); please + * read the documentation (path below) first. * - 'NF' For a netdev_features_t * - 'h[CDN]' For a variable-length buffer, it prints it as a hex string with * a certain separator (' ' by default): @@ -2228,7 +2230,8 @@ early_param("no_hash_pointers", no_hash_pointers_enable); * Without an option prints the full name of the node * f full name * P node name, including a possible unit address - * - 'x' For printing the address. Equivalent to "%lx". + * - 'x' For printing the address unmodified. Equivalent to "%lx". + * Please read the documentation (path below) before using! * - '[ku]s' For a BPF/tracing related format specifier, e.g. used out of * bpf_trace_printk() where [ku] prefix specifies either kernel (k) * or user (u) memory to probe, and: From 84696cfaf4d90945eb2a8302edc6cf627db56b84 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Fri, 23 Apr 2021 11:45:29 +0200 Subject: [PATCH 24/24] lib/vsprintf.c: remove leftover 'f' and 'F' cases from bstr_printf() Commit 9af7706492f9 ("lib/vsprintf: Remove support for %pF and %pf in favour of %pS and %ps") removed support for %pF and %pf, and correctly removed the handling of those cases in vbin_printf(). However, the corresponding cases in bstr_printf() were left behind. In the same series, %pf was re-purposed for dealing with fwnodes (3bd32d6a2ee6, "lib/vsprintf: Add %pfw conversion specifier for printing fwnode names"). So should anyone use %pf with the binary printf routines, vbin_printf() would (correctly, as it involves dereferencing the pointer) do the string formatting to the u32 array, but bstr_printf() would not copy the string from the u32 array, but instead interpret the first sizeof(void*) bytes of the formatted string as a pointer - which generally won't end well (also, all subsequent get_args would be out of sync). Fixes: 9af7706492f9 ("lib/vsprintf: Remove support for %pF and %pf in favour of %pS and %ps") Cc: stable@vger.kernel.org Signed-off-by: Rasmus Villemoes Reviewed-by: Sakari Ailus Signed-off-by: Petr Mladek Link: https://lore.kernel.org/r/20210423094529.1862521-1-linux@rasmusvillemoes.dk --- lib/vsprintf.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/lib/vsprintf.c b/lib/vsprintf.c index 5ec8ad238d03..7caf619ee9c2 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -3141,8 +3141,6 @@ int bstr_printf(char *buf, size_t size, const char *fmt, const u32 *bin_buf) switch (*fmt) { case 'S': case 's': - case 'F': - case 'f': case 'x': case 'K': case 'e':