memfd: Convert memfd_wait_for_pins to XArray
Simplify the locking by taking the spinlock while we walk the tree on the assumption that many acquires and releases of the lock will be worse than holding the lock while we process an entire batch of pages. Signed-off-by: Matthew Wilcox <willy@infradead.org> Reviewed-by: Mike Kravetz <mike.kravetz@oracle.com>
This commit is contained in:
parent
7ae3424fb4
commit
2313216f86
57
mm/memfd.c
57
mm/memfd.c
|
@ -21,7 +21,7 @@
|
||||||
#include <uapi/linux/memfd.h>
|
#include <uapi/linux/memfd.h>
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We need a tag: a new tag would expand every radix_tree_node by 8 bytes,
|
* We need a tag: a new tag would expand every xa_node by 8 bytes,
|
||||||
* so reuse a tag which we firmly believe is never set or cleared on tmpfs
|
* so reuse a tag which we firmly believe is never set or cleared on tmpfs
|
||||||
* or hugetlbfs because they are memory only filesystems.
|
* or hugetlbfs because they are memory only filesystems.
|
||||||
*/
|
*/
|
||||||
|
@ -72,9 +72,7 @@ static void memfd_tag_pins(struct address_space *mapping)
|
||||||
*/
|
*/
|
||||||
static int memfd_wait_for_pins(struct address_space *mapping)
|
static int memfd_wait_for_pins(struct address_space *mapping)
|
||||||
{
|
{
|
||||||
struct radix_tree_iter iter;
|
XA_STATE(xas, &mapping->i_pages, 0);
|
||||||
void __rcu **slot;
|
|
||||||
pgoff_t start;
|
|
||||||
struct page *page;
|
struct page *page;
|
||||||
int error, scan;
|
int error, scan;
|
||||||
|
|
||||||
|
@ -82,7 +80,9 @@ static int memfd_wait_for_pins(struct address_space *mapping)
|
||||||
|
|
||||||
error = 0;
|
error = 0;
|
||||||
for (scan = 0; scan <= LAST_SCAN; scan++) {
|
for (scan = 0; scan <= LAST_SCAN; scan++) {
|
||||||
if (!radix_tree_tagged(&mapping->i_pages, MEMFD_TAG_PINNED))
|
unsigned int tagged = 0;
|
||||||
|
|
||||||
|
if (!xas_marked(&xas, MEMFD_TAG_PINNED))
|
||||||
break;
|
break;
|
||||||
|
|
||||||
if (!scan)
|
if (!scan)
|
||||||
|
@ -90,45 +90,34 @@ static int memfd_wait_for_pins(struct address_space *mapping)
|
||||||
else if (schedule_timeout_killable((HZ << scan) / 200))
|
else if (schedule_timeout_killable((HZ << scan) / 200))
|
||||||
scan = LAST_SCAN;
|
scan = LAST_SCAN;
|
||||||
|
|
||||||
start = 0;
|
xas_set(&xas, 0);
|
||||||
rcu_read_lock();
|
xas_lock_irq(&xas);
|
||||||
radix_tree_for_each_tagged(slot, &mapping->i_pages, &iter,
|
xas_for_each_marked(&xas, page, ULONG_MAX, MEMFD_TAG_PINNED) {
|
||||||
start, MEMFD_TAG_PINNED) {
|
bool clear = true;
|
||||||
|
if (xa_is_value(page))
|
||||||
page = radix_tree_deref_slot(slot);
|
|
||||||
if (radix_tree_exception(page)) {
|
|
||||||
if (radix_tree_deref_retry(page)) {
|
|
||||||
slot = radix_tree_iter_retry(&iter);
|
|
||||||
continue;
|
continue;
|
||||||
}
|
if (page_count(page) - page_mapcount(page) != 1) {
|
||||||
|
|
||||||
page = NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (page &&
|
|
||||||
page_count(page) - page_mapcount(page) != 1) {
|
|
||||||
if (scan < LAST_SCAN)
|
|
||||||
goto continue_resched;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* On the last scan, we clean up all those tags
|
* On the last scan, we clean up all those tags
|
||||||
* we inserted; but make a note that we still
|
* we inserted; but make a note that we still
|
||||||
* found pages pinned.
|
* found pages pinned.
|
||||||
*/
|
*/
|
||||||
|
if (scan == LAST_SCAN)
|
||||||
error = -EBUSY;
|
error = -EBUSY;
|
||||||
|
else
|
||||||
|
clear = false;
|
||||||
}
|
}
|
||||||
|
if (clear)
|
||||||
|
xas_clear_mark(&xas, MEMFD_TAG_PINNED);
|
||||||
|
if (++tagged % XA_CHECK_SCHED)
|
||||||
|
continue;
|
||||||
|
|
||||||
xa_lock_irq(&mapping->i_pages);
|
xas_pause(&xas);
|
||||||
radix_tree_tag_clear(&mapping->i_pages,
|
xas_unlock_irq(&xas);
|
||||||
iter.index, MEMFD_TAG_PINNED);
|
cond_resched();
|
||||||
xa_unlock_irq(&mapping->i_pages);
|
xas_lock_irq(&xas);
|
||||||
continue_resched:
|
|
||||||
if (need_resched()) {
|
|
||||||
slot = radix_tree_iter_resume(slot, &iter);
|
|
||||||
cond_resched_rcu();
|
|
||||||
}
|
}
|
||||||
}
|
xas_unlock_irq(&xas);
|
||||||
rcu_read_unlock();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return error;
|
return error;
|
||||||
|
|
Loading…
Reference in New Issue