tracing: Replace kmap with copy_from_user() in trace_marker writing
Instead of using get_user_pages_fast() and kmap_atomic() when writing
to the trace_marker file, just allocate enough space on the ring buffer
directly, and write into it via copy_from_user().
Writing into the trace_marker file use to allocate a temporary buffer
to perform the copy_from_user(), as we didn't want to write into the
ring buffer if the copy failed. But as a trace_marker write is suppose
to be extremely fast, and allocating memory causes other tracepoints to
trigger, Peter Zijlstra suggested using get_user_pages_fast() and
kmap_atomic() to keep the user space pages in memory and reading it
directly. But Henrik Austad had issues with this because it required taking
the mm->mmap_sem and causing long delays with the write.
Instead, just allocate the space in the ring buffer and use
copy_from_user() directly. If it faults, return -EFAULT and write
"<faulted>" into the ring buffer.
Link: http://lkml.kernel.org/r/20161208124018.72dd0f86@gandalf.local.home
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Henrik Austad <henrik@austad.us>
Cc: Peter Zijlstra <peterz@infradead.org>
Updates: d696b58ca2
"tracing: Do not allocate buffer for trace_marker"
Suggested-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
This commit is contained in:
parent
847fa1a6d3
commit
656c7f0d2d
|
@ -5738,61 +5738,6 @@ tracing_free_buffer_release(struct inode *inode, struct file *filp)
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline int lock_user_pages(const char __user *ubuf, size_t cnt,
|
|
||||||
struct page **pages, void **map_page,
|
|
||||||
int *offset)
|
|
||||||
{
|
|
||||||
unsigned long addr = (unsigned long)ubuf;
|
|
||||||
int nr_pages = 1;
|
|
||||||
int ret;
|
|
||||||
int i;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Userspace is injecting traces into the kernel trace buffer.
|
|
||||||
* We want to be as non intrusive as possible.
|
|
||||||
* To do so, we do not want to allocate any special buffers
|
|
||||||
* or take any locks, but instead write the userspace data
|
|
||||||
* straight into the ring buffer.
|
|
||||||
*
|
|
||||||
* First we need to pin the userspace buffer into memory,
|
|
||||||
* which, most likely it is, because it just referenced it.
|
|
||||||
* But there's no guarantee that it is. By using get_user_pages_fast()
|
|
||||||
* and kmap_atomic/kunmap_atomic() we can get access to the
|
|
||||||
* pages directly. We then write the data directly into the
|
|
||||||
* ring buffer.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/* check if we cross pages */
|
|
||||||
if ((addr & PAGE_MASK) != ((addr + cnt) & PAGE_MASK))
|
|
||||||
nr_pages = 2;
|
|
||||||
|
|
||||||
*offset = addr & (PAGE_SIZE - 1);
|
|
||||||
addr &= PAGE_MASK;
|
|
||||||
|
|
||||||
ret = get_user_pages_fast(addr, nr_pages, 0, pages);
|
|
||||||
if (ret < nr_pages) {
|
|
||||||
while (--ret >= 0)
|
|
||||||
put_page(pages[ret]);
|
|
||||||
return -EFAULT;
|
|
||||||
}
|
|
||||||
|
|
||||||
for (i = 0; i < nr_pages; i++)
|
|
||||||
map_page[i] = kmap_atomic(pages[i]);
|
|
||||||
|
|
||||||
return nr_pages;
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline void unlock_user_pages(struct page **pages,
|
|
||||||
void **map_page, int nr_pages)
|
|
||||||
{
|
|
||||||
int i;
|
|
||||||
|
|
||||||
for (i = nr_pages - 1; i >= 0; i--) {
|
|
||||||
kunmap_atomic(map_page[i]);
|
|
||||||
put_page(pages[i]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static ssize_t
|
static ssize_t
|
||||||
tracing_mark_write(struct file *filp, const char __user *ubuf,
|
tracing_mark_write(struct file *filp, const char __user *ubuf,
|
||||||
size_t cnt, loff_t *fpos)
|
size_t cnt, loff_t *fpos)
|
||||||
|
@ -5802,14 +5747,14 @@ tracing_mark_write(struct file *filp, const char __user *ubuf,
|
||||||
struct ring_buffer *buffer;
|
struct ring_buffer *buffer;
|
||||||
struct print_entry *entry;
|
struct print_entry *entry;
|
||||||
unsigned long irq_flags;
|
unsigned long irq_flags;
|
||||||
struct page *pages[2];
|
const char faulted[] = "<faulted>";
|
||||||
void *map_page[2];
|
|
||||||
int nr_pages = 1;
|
|
||||||
ssize_t written;
|
ssize_t written;
|
||||||
int offset;
|
|
||||||
int size;
|
int size;
|
||||||
int len;
|
int len;
|
||||||
|
|
||||||
|
/* Used in tracing_mark_raw_write() as well */
|
||||||
|
#define FAULTED_SIZE (sizeof(faulted) - 1) /* '\0' is already accounted for */
|
||||||
|
|
||||||
if (tracing_disabled)
|
if (tracing_disabled)
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
|
||||||
|
@ -5821,30 +5766,31 @@ tracing_mark_write(struct file *filp, const char __user *ubuf,
|
||||||
|
|
||||||
BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
|
BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
|
||||||
|
|
||||||
nr_pages = lock_user_pages(ubuf, cnt, pages, map_page, &offset);
|
|
||||||
if (nr_pages < 0)
|
|
||||||
return nr_pages;
|
|
||||||
|
|
||||||
local_save_flags(irq_flags);
|
local_save_flags(irq_flags);
|
||||||
size = sizeof(*entry) + cnt + 2; /* possible \n added */
|
size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
|
||||||
|
|
||||||
|
/* If less than "<faulted>", then make sure we can still add that */
|
||||||
|
if (cnt < FAULTED_SIZE)
|
||||||
|
size += FAULTED_SIZE - cnt;
|
||||||
|
|
||||||
buffer = tr->trace_buffer.buffer;
|
buffer = tr->trace_buffer.buffer;
|
||||||
event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
|
event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
|
||||||
irq_flags, preempt_count());
|
irq_flags, preempt_count());
|
||||||
if (!event) {
|
if (unlikely(!event))
|
||||||
/* Ring buffer disabled, return as if not open for write */
|
/* Ring buffer disabled, return as if not open for write */
|
||||||
written = -EBADF;
|
return -EBADF;
|
||||||
goto out_unlock;
|
|
||||||
}
|
|
||||||
|
|
||||||
entry = ring_buffer_event_data(event);
|
entry = ring_buffer_event_data(event);
|
||||||
entry->ip = _THIS_IP_;
|
entry->ip = _THIS_IP_;
|
||||||
|
|
||||||
if (nr_pages == 2) {
|
len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
|
||||||
len = PAGE_SIZE - offset;
|
if (len) {
|
||||||
memcpy(&entry->buf, map_page[0] + offset, len);
|
memcpy(&entry->buf, faulted, FAULTED_SIZE);
|
||||||
memcpy(&entry->buf[len], map_page[1], cnt - len);
|
cnt = FAULTED_SIZE;
|
||||||
|
written = -EFAULT;
|
||||||
} else
|
} else
|
||||||
memcpy(&entry->buf, map_page[0] + offset, cnt);
|
written = cnt;
|
||||||
|
len = cnt;
|
||||||
|
|
||||||
if (entry->buf[cnt - 1] != '\n') {
|
if (entry->buf[cnt - 1] != '\n') {
|
||||||
entry->buf[cnt] = '\n';
|
entry->buf[cnt] = '\n';
|
||||||
|
@ -5854,12 +5800,8 @@ tracing_mark_write(struct file *filp, const char __user *ubuf,
|
||||||
|
|
||||||
__buffer_unlock_commit(buffer, event);
|
__buffer_unlock_commit(buffer, event);
|
||||||
|
|
||||||
written = cnt;
|
if (written > 0)
|
||||||
|
*fpos += written;
|
||||||
*fpos += written;
|
|
||||||
|
|
||||||
out_unlock:
|
|
||||||
unlock_user_pages(pages, map_page, nr_pages);
|
|
||||||
|
|
||||||
return written;
|
return written;
|
||||||
}
|
}
|
||||||
|
@ -5875,15 +5817,14 @@ tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
|
||||||
struct ring_buffer_event *event;
|
struct ring_buffer_event *event;
|
||||||
struct ring_buffer *buffer;
|
struct ring_buffer *buffer;
|
||||||
struct raw_data_entry *entry;
|
struct raw_data_entry *entry;
|
||||||
|
const char faulted[] = "<faulted>";
|
||||||
unsigned long irq_flags;
|
unsigned long irq_flags;
|
||||||
struct page *pages[2];
|
|
||||||
void *map_page[2];
|
|
||||||
int nr_pages = 1;
|
|
||||||
ssize_t written;
|
ssize_t written;
|
||||||
int offset;
|
|
||||||
int size;
|
int size;
|
||||||
int len;
|
int len;
|
||||||
|
|
||||||
|
#define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
|
||||||
|
|
||||||
if (tracing_disabled)
|
if (tracing_disabled)
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
|
||||||
|
@ -5899,38 +5840,32 @@ tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
|
||||||
|
|
||||||
BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
|
BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
|
||||||
|
|
||||||
nr_pages = lock_user_pages(ubuf, cnt, pages, map_page, &offset);
|
|
||||||
if (nr_pages < 0)
|
|
||||||
return nr_pages;
|
|
||||||
|
|
||||||
local_save_flags(irq_flags);
|
local_save_flags(irq_flags);
|
||||||
size = sizeof(*entry) + cnt;
|
size = sizeof(*entry) + cnt;
|
||||||
|
if (cnt < FAULT_SIZE_ID)
|
||||||
|
size += FAULT_SIZE_ID - cnt;
|
||||||
|
|
||||||
buffer = tr->trace_buffer.buffer;
|
buffer = tr->trace_buffer.buffer;
|
||||||
event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
|
event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
|
||||||
irq_flags, preempt_count());
|
irq_flags, preempt_count());
|
||||||
if (!event) {
|
if (!event)
|
||||||
/* Ring buffer disabled, return as if not open for write */
|
/* Ring buffer disabled, return as if not open for write */
|
||||||
written = -EBADF;
|
return -EBADF;
|
||||||
goto out_unlock;
|
|
||||||
}
|
|
||||||
|
|
||||||
entry = ring_buffer_event_data(event);
|
entry = ring_buffer_event_data(event);
|
||||||
|
|
||||||
if (nr_pages == 2) {
|
len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
|
||||||
len = PAGE_SIZE - offset;
|
if (len) {
|
||||||
memcpy(&entry->id, map_page[0] + offset, len);
|
entry->id = -1;
|
||||||
memcpy(((char *)&entry->id) + len, map_page[1], cnt - len);
|
memcpy(&entry->buf, faulted, FAULTED_SIZE);
|
||||||
|
written = -EFAULT;
|
||||||
} else
|
} else
|
||||||
memcpy(&entry->id, map_page[0] + offset, cnt);
|
written = cnt;
|
||||||
|
|
||||||
__buffer_unlock_commit(buffer, event);
|
__buffer_unlock_commit(buffer, event);
|
||||||
|
|
||||||
written = cnt;
|
if (written > 0)
|
||||||
|
*fpos += written;
|
||||||
*fpos += written;
|
|
||||||
|
|
||||||
out_unlock:
|
|
||||||
unlock_user_pages(pages, map_page, nr_pages);
|
|
||||||
|
|
||||||
return written;
|
return written;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue