From de462e5f10718517bacf2f84c8aa2804567ef7df Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Sun, 26 Apr 2020 15:53:30 +0900 Subject: [PATCH 01/10] bootconfig: Fix to remove bootconfig data from initrd while boot If there is a bootconfig data in the tail of initrd/initramfs, initrd image sanity check caused an error while decompression stage as follows. [ 0.883882] Unpacking initramfs... [ 2.696429] Initramfs unpacking failed: invalid magic at start of compressed archive This error will be ignored if CONFIG_BLK_DEV_RAM=n, but CONFIG_BLK_DEV_RAM=y the kernel failed to mount rootfs and causes a panic. To fix this issue, shrink down the initrd_end for removing tailing bootconfig data while boot the kernel. Link: http://lkml.kernel.org/r/158788401014.24243.17424755854115077915.stgit@devnote2 Cc: Borislav Petkov Cc: Kees Cook Cc: Ingo Molnar Cc: Andrew Morton Cc: stable@vger.kernel.org Fixes: 7684b8582c24 ("bootconfig: Load boot config from the tail of initrd") Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- init/main.c | 69 ++++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 52 insertions(+), 17 deletions(-) diff --git a/init/main.c b/init/main.c index a48617f2e5e5..1a5da2c2660c 100644 --- a/init/main.c +++ b/init/main.c @@ -257,6 +257,47 @@ static int __init loglevel(char *str) early_param("loglevel", loglevel); +#ifdef CONFIG_BLK_DEV_INITRD +static void * __init get_boot_config_from_initrd(u32 *_size, u32 *_csum) +{ + u32 size, csum; + char *data; + u32 *hdr; + + if (!initrd_end) + return NULL; + + data = (char *)initrd_end - BOOTCONFIG_MAGIC_LEN; + if (memcmp(data, BOOTCONFIG_MAGIC, BOOTCONFIG_MAGIC_LEN)) + return NULL; + + hdr = (u32 *)(data - 8); + size = hdr[0]; + csum = hdr[1]; + + data = ((void *)hdr) - size; + if ((unsigned long)data < initrd_start) { + pr_err("bootconfig size %d is greater than initrd size %ld\n", + size, initrd_end - initrd_start); + return NULL; + } + + /* Remove bootconfig from initramfs/initrd */ + initrd_end = (unsigned long)data; + if (_size) + *_size = size; + if (_csum) + *_csum = csum; + + return data; +} +#else +static void * __init get_boot_config_from_initrd(u32 *_size, u32 *_csum) +{ + return NULL; +} +#endif + #ifdef CONFIG_BOOT_CONFIG char xbc_namebuf[XBC_KEYLEN_MAX] __initdata; @@ -357,9 +398,12 @@ static void __init setup_boot_config(const char *cmdline) int pos; u32 size, csum; char *data, *copy; - u32 *hdr; int ret; + data = get_boot_config_from_initrd(&size, &csum); + if (!data) + goto not_found; + strlcpy(tmp_cmdline, boot_command_line, COMMAND_LINE_SIZE); parse_args("bootconfig", tmp_cmdline, NULL, 0, 0, 0, NULL, bootconfig_params); @@ -367,27 +411,12 @@ static void __init setup_boot_config(const char *cmdline) if (!bootconfig_found) return; - if (!initrd_end) - goto not_found; - - data = (char *)initrd_end - BOOTCONFIG_MAGIC_LEN; - if (memcmp(data, BOOTCONFIG_MAGIC, BOOTCONFIG_MAGIC_LEN)) - goto not_found; - - hdr = (u32 *)(data - 8); - size = hdr[0]; - csum = hdr[1]; - if (size >= XBC_DATA_MAX) { pr_err("bootconfig size %d greater than max size %d\n", size, XBC_DATA_MAX); return; } - data = ((void *)hdr) - size; - if ((unsigned long)data < initrd_start) - goto not_found; - if (boot_config_checksum((unsigned char *)data, size) != csum) { pr_err("bootconfig checksum failed\n"); return; @@ -420,8 +449,14 @@ static void __init setup_boot_config(const char *cmdline) not_found: pr_err("'bootconfig' found on command line, but no bootconfig found\n"); } + #else -#define setup_boot_config(cmdline) do { } while (0) + +static void __init setup_boot_config(const char *cmdline) +{ + /* Remove bootconfig data from initrd */ + get_boot_config_from_initrd(NULL, NULL); +} static int __init warn_bootconfig(char *str) { From dcbd21c9fca5e954fd4e3d91884907eb6d47187e Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Sat, 25 Apr 2020 14:49:09 +0900 Subject: [PATCH 02/10] tracing/kprobes: Fix a double initialization typo Fix a typo that resulted in an unnecessary double initialization to addr. Link: http://lkml.kernel.org/r/158779374968.6082.2337484008464939919.stgit@devnote2 Cc: Tom Zanussi Cc: Ingo Molnar Cc: stable@vger.kernel.org Fixes: c7411a1a126f ("tracing/kprobe: Check whether the non-suffixed symbol is notrace") Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_kprobe.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index d0568af4a0ef..0d9300c3b084 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -453,7 +453,7 @@ static bool __within_notrace_func(unsigned long addr) static bool within_notrace_func(struct trace_kprobe *tk) { - unsigned long addr = addr = trace_kprobe_address(tk); + unsigned long addr = trace_kprobe_address(tk); char symname[KSYM_NAME_LEN], *p; if (!__within_notrace_func(addr)) From da0f1f4167e3af69e1d8b32d6d65195ddd2bfb64 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Sat, 25 Apr 2020 14:49:17 +0900 Subject: [PATCH 03/10] tracing/boottime: Fix kprobe event API usage Fix boottime kprobe events to use API correctly for multiple events. For example, when we set a multiprobe kprobe events in bootconfig like below, ftrace.event.kprobes.myevent { probes = "vfs_read $arg1 $arg2", "vfs_write $arg1 $arg2" } This cause an error; trace_boot: Failed to add probe: p:kprobes/myevent (null) vfs_read $arg1 $arg2 vfs_write $arg1 $arg2 This shows the 1st argument becomes NULL and multiprobes are merged to 1 probe. Link: http://lkml.kernel.org/r/158779375766.6082.201939936008972838.stgit@devnote2 Cc: Ingo Molnar Cc: stable@vger.kernel.org Fixes: 29a154810546 ("tracing: Change trace_boot to use kprobe_event interface") Reviewed-by: Tom Zanussi Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_boot.c | 24 ++++++++++-------------- 1 file changed, 10 insertions(+), 14 deletions(-) diff --git a/kernel/trace/trace_boot.c b/kernel/trace/trace_boot.c index 06d7feb5255f..9de29bb45a27 100644 --- a/kernel/trace/trace_boot.c +++ b/kernel/trace/trace_boot.c @@ -95,23 +95,19 @@ trace_boot_add_kprobe_event(struct xbc_node *node, const char *event) struct xbc_node *anode; char buf[MAX_BUF_LEN]; const char *val; - int ret; - - kprobe_event_cmd_init(&cmd, buf, MAX_BUF_LEN); - - ret = kprobe_event_gen_cmd_start(&cmd, event, NULL); - if (ret) - return ret; + int ret = 0; xbc_node_for_each_array_value(node, "probes", anode, val) { - ret = kprobe_event_add_field(&cmd, val); - if (ret) - return ret; - } + kprobe_event_cmd_init(&cmd, buf, MAX_BUF_LEN); - ret = kprobe_event_gen_cmd_end(&cmd); - if (ret) - pr_err("Failed to add probe: %s\n", buf); + ret = kprobe_event_gen_cmd_start(&cmd, event, val); + if (ret) + break; + + ret = kprobe_event_gen_cmd_end(&cmd); + if (ret) + pr_err("Failed to add probe: %s\n", buf); + } return ret; } From 5b4dcd2d201a395ad4054067bfae4a07554fbd65 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Sat, 25 Apr 2020 14:49:26 +0900 Subject: [PATCH 04/10] tracing/kprobes: Reject new event if loc is NULL Reject the new event which has NULL location for kprobes. For kprobes, user must specify at least the location. Link: http://lkml.kernel.org/r/158779376597.6082.1411212055469099461.stgit@devnote2 Cc: Tom Zanussi Cc: Ingo Molnar Cc: stable@vger.kernel.org Fixes: 2a588dd1d5d6 ("tracing: Add kprobe event command generation functions") Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_kprobe.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 0d9300c3b084..35989383ae11 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -940,6 +940,9 @@ EXPORT_SYMBOL_GPL(kprobe_event_cmd_init); * complete command or only the first part of it; in the latter case, * kprobe_event_add_fields() can be used to add more fields following this. * + * Unlikely the synth_event_gen_cmd_start(), @loc must be specified. This + * returns -EINVAL if @loc == NULL. + * * Return: 0 if successful, error otherwise. */ int __kprobe_event_gen_cmd_start(struct dynevent_cmd *cmd, bool kretprobe, @@ -953,6 +956,9 @@ int __kprobe_event_gen_cmd_start(struct dynevent_cmd *cmd, bool kretprobe, if (cmd->type != DYNEVENT_TYPE_KPROBE) return -EINVAL; + if (!loc) + return -EINVAL; + if (kretprobe) snprintf(buf, MAX_EVENT_NAME_LEN, "r:kprobes/%s", name); else From d16a8c31077e75ecb9427fbfea59b74eed00f698 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Wed, 6 May 2020 10:20:10 -0400 Subject: [PATCH 05/10] tracing: Wait for preempt irq delay thread to finish Running on a slower machine, it is possible that the preempt delay kernel thread may still be executing if the module was immediately removed after added, and this can cause the kernel to crash as the kernel thread might be executing after its code has been removed. There's no reason that the caller of the code shouldn't just wait for the delay thread to finish, as the thread can also be created by a trigger in the sysfs code, which also has the same issues. Link: http://lore.kernel.org/r/5EA2B0C8.2080706@cn.fujitsu.com Cc: stable@vger.kernel.org Fixes: 793937236d1ee ("lib: Add module for testing preemptoff/irqsoff latency tracers") Reported-by: Xiao Yang Reviewed-by: Xiao Yang Reviewed-by: Joel Fernandes Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/preemptirq_delay_test.c | 30 ++++++++++++++++++++++------ 1 file changed, 24 insertions(+), 6 deletions(-) diff --git a/kernel/trace/preemptirq_delay_test.c b/kernel/trace/preemptirq_delay_test.c index 31c0fad4cb9e..c4c86de63cf9 100644 --- a/kernel/trace/preemptirq_delay_test.c +++ b/kernel/trace/preemptirq_delay_test.c @@ -113,22 +113,42 @@ static int preemptirq_delay_run(void *data) for (i = 0; i < s; i++) (testfuncs[i])(i); + + set_current_state(TASK_INTERRUPTIBLE); + while (!kthread_should_stop()) { + schedule(); + set_current_state(TASK_INTERRUPTIBLE); + } + + __set_current_state(TASK_RUNNING); + return 0; } -static struct task_struct *preemptirq_start_test(void) +static int preemptirq_run_test(void) { + struct task_struct *task; + char task_name[50]; snprintf(task_name, sizeof(task_name), "%s_test", test_mode); - return kthread_run(preemptirq_delay_run, NULL, task_name); + task = kthread_run(preemptirq_delay_run, NULL, task_name); + if (IS_ERR(task)) + return PTR_ERR(task); + if (task) + kthread_stop(task); + return 0; } static ssize_t trigger_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count) { - preemptirq_start_test(); + ssize_t ret; + + ret = preemptirq_run_test(); + if (ret) + return ret; return count; } @@ -148,11 +168,9 @@ static struct kobject *preemptirq_delay_kobj; static int __init preemptirq_delay_init(void) { - struct task_struct *test_task; int retval; - test_task = preemptirq_start_test(); - retval = PTR_ERR_OR_ZERO(test_task); + retval = preemptirq_run_test(); if (retval != 0) return retval; From 11f5efc3ab66284f7aaacc926e9351d658e2577b Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Wed, 6 May 2020 10:36:18 -0400 Subject: [PATCH 06/10] tracing: Add a vmalloc_sync_mappings() for safe measure x86_64 lazily maps in the vmalloc pages, and the way this works with per_cpu areas can be complex, to say the least. Mappings may happen at boot up, and if nothing synchronizes the page tables, those page mappings may not be synced till they are used. This causes issues for anything that might touch one of those mappings in the path of the page fault handler. When one of those unmapped mappings is touched in the page fault handler, it will cause another page fault, which in turn will cause a page fault, and leave us in a loop of page faults. Commit 763802b53a42 ("x86/mm: split vmalloc_sync_all()") split vmalloc_sync_all() into vmalloc_sync_unmappings() and vmalloc_sync_mappings(), as on system exit, it did not need to do a full sync on x86_64 (although it still needed to be done on x86_32). By chance, the vmalloc_sync_all() would synchronize the page mappings done at boot up and prevent the per cpu area from being a problem for tracing in the page fault handler. But when that synchronization in the exit of a task became a nop, it caused the problem to appear. Link: https://lore.kernel.org/r/20200429054857.66e8e333@oasis.local.home Cc: stable@vger.kernel.org Fixes: 737223fbca3b1 ("tracing: Consolidate buffer allocation code") Reported-by: "Tzvetomir Stoyanov (VMware)" Suggested-by: Joerg Roedel Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 8d2b98812625..9ed6d92768af 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -8525,6 +8525,19 @@ static int allocate_trace_buffers(struct trace_array *tr, int size) */ allocate_snapshot = false; #endif + + /* + * Because of some magic with the way alloc_percpu() works on + * x86_64, we need to synchronize the pgd of all the tables, + * otherwise the trace events that happen in x86_64 page fault + * handlers can't cope with accessing the chance that a + * alloc_percpu()'d memory might be touched in the page fault trace + * event. Oh, and we need to audit all other alloc_percpu() and vmalloc() + * calls in tracing, because something might get triggered within a + * page fault trace event! + */ + vmalloc_sync_mappings(); + return 0; } From 386c82a70319d42dba4f1b30e5e7076f2b4d8c2f Mon Sep 17 00:00:00 2001 From: Yiwei Zhang Date: Tue, 28 Apr 2020 15:08:25 -0700 Subject: [PATCH 07/10] gpu/trace: Minor comment updates for gpu_mem_total tracepoint This change updates the improper comment for the 'size' attribute in the tracepoint definition. Most gfx drivers pre-fault in physical pages instead of making virtual allocations. So we drop the 'Virtual' keyword here and leave this to the implementations. Link: http://lkml.kernel.org/r/20200428220825.169606-1-zzyiwei@google.com Signed-off-by: Yiwei Zhang Signed-off-by: Steven Rostedt (VMware) --- include/trace/events/gpu_mem.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/trace/events/gpu_mem.h b/include/trace/events/gpu_mem.h index 1897822a9150..26d871f96e94 100644 --- a/include/trace/events/gpu_mem.h +++ b/include/trace/events/gpu_mem.h @@ -24,7 +24,7 @@ * * @pid: Put 0 for global total, while positive pid for process total. * - * @size: Virtual size of the allocation in bytes. + * @size: Size of the allocation in bytes. * */ TRACE_EVENT(gpu_mem_total, From f094a233e1d5b1c61cc797d204aa28b611058827 Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Tue, 28 Apr 2020 21:49:59 +0000 Subject: [PATCH 08/10] tracing: Fix doc mistakes in trace sample As the example below shows, DECLARE_EVENT_CLASS() is used instead of DEFINE_EVENT_CLASS(). Link: http://lkml.kernel.org/r/20200428214959.11259-1-richard.weiyang@gmail.com Signed-off-by: Wei Yang Signed-off-by: Steven Rostedt (VMware) --- samples/trace_events/trace-events-sample.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/trace_events/trace-events-sample.h b/samples/trace_events/trace-events-sample.h index 80b4a70315b6..13a35f7cbe66 100644 --- a/samples/trace_events/trace-events-sample.h +++ b/samples/trace_events/trace-events-sample.h @@ -416,7 +416,7 @@ TRACE_EVENT_FN(foo_bar_with_fn, * Note, TRACE_EVENT() itself is simply defined as: * * #define TRACE_EVENT(name, proto, args, tstruct, assign, printk) \ - * DEFINE_EVENT_CLASS(name, proto, args, tstruct, assign, printk); \ + * DECLARE_EVENT_CLASS(name, proto, args, tstruct, assign, printk); \ * DEFINE_EVENT(name, name, proto, args) * * The DEFINE_EVENT() also can be declared with conditions and reg functions: From 192b7993b3ff92b62b687e940e5e88fa0218d764 Mon Sep 17 00:00:00 2001 From: Zou Wei Date: Thu, 23 Apr 2020 12:08:25 +0800 Subject: [PATCH 09/10] tracing: Make tracing_snapshot_instance_cond() static Fix the following sparse warning: kernel/trace/trace.c:950:6: warning: symbol 'tracing_snapshot_instance_cond' was not declared. Should it be static? Link: http://lkml.kernel.org/r/1587614905-48692-1-git-send-email-zou_wei@huawei.com Reported-by: Hulk Robot Signed-off-by: Zou Wei Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 9ed6d92768af..29615f15a820 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -947,7 +947,8 @@ int __trace_bputs(unsigned long ip, const char *str) EXPORT_SYMBOL_GPL(__trace_bputs); #ifdef CONFIG_TRACER_SNAPSHOT -void tracing_snapshot_instance_cond(struct trace_array *tr, void *cond_data) +static void tracing_snapshot_instance_cond(struct trace_array *tr, + void *cond_data) { struct tracer *tracer = tr->current_trace; unsigned long flags; From 8842604446d1f005abcbf8c63c12eabdb5695094 Mon Sep 17 00:00:00 2001 From: Yunfeng Ye Date: Thu, 7 May 2020 17:23:36 +0800 Subject: [PATCH 10/10] tools/bootconfig: Fix resource leak in apply_xbc() Fix the @data and @fd allocations that are leaked in the error path of apply_xbc(). Link: http://lkml.kernel.org/r/583a49c9-c27a-931d-e6c2-6f63a4b18bea@huawei.com Acked-by: Masami Hiramatsu Signed-off-by: Yunfeng Ye Signed-off-by: Steven Rostedt (VMware) --- tools/bootconfig/main.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/tools/bootconfig/main.c b/tools/bootconfig/main.c index 16b9a420e6fd..001076c51712 100644 --- a/tools/bootconfig/main.c +++ b/tools/bootconfig/main.c @@ -314,6 +314,7 @@ int apply_xbc(const char *path, const char *xbc_path) ret = delete_xbc(path); if (ret < 0) { pr_err("Failed to delete previous boot config: %d\n", ret); + free(data); return ret; } @@ -321,24 +322,26 @@ int apply_xbc(const char *path, const char *xbc_path) fd = open(path, O_RDWR | O_APPEND); if (fd < 0) { pr_err("Failed to open %s: %d\n", path, fd); + free(data); return fd; } /* TODO: Ensure the @path is initramfs/initrd image */ ret = write(fd, data, size + 8); if (ret < 0) { pr_err("Failed to apply a boot config: %d\n", ret); - return ret; + goto out; } /* Write a magic word of the bootconfig */ ret = write(fd, BOOTCONFIG_MAGIC, BOOTCONFIG_MAGIC_LEN); if (ret < 0) { pr_err("Failed to apply a boot config magic: %d\n", ret); - return ret; + goto out; } +out: close(fd); free(data); - return 0; + return ret; } int usage(void)