Merge branch 'akpm' (patches from Andrew)
Merge more fixes from Andrew Morton: "5 fixes" * emailed patches from Andrew Morton <akpm@linux-foundation.org>: swap_readpage(): avoid blk_wake_io_task() if !synchronous devres: allow const resource arguments mm/vmscan.c: prevent useless kswapd loops fs/userfaultfd.c: disable irqs for fault_pending and event locks mm/page_alloc.c: fix regression with deferred struct page init
This commit is contained in:
commit
a5fff14a0c
|
@ -40,6 +40,16 @@ enum userfaultfd_state {
|
||||||
/*
|
/*
|
||||||
* Start with fault_pending_wqh and fault_wqh so they're more likely
|
* Start with fault_pending_wqh and fault_wqh so they're more likely
|
||||||
* to be in the same cacheline.
|
* to be in the same cacheline.
|
||||||
|
*
|
||||||
|
* Locking order:
|
||||||
|
* fd_wqh.lock
|
||||||
|
* fault_pending_wqh.lock
|
||||||
|
* fault_wqh.lock
|
||||||
|
* event_wqh.lock
|
||||||
|
*
|
||||||
|
* To avoid deadlocks, IRQs must be disabled when taking any of the above locks,
|
||||||
|
* since fd_wqh.lock is taken by aio_poll() while it's holding a lock that's
|
||||||
|
* also taken in IRQ context.
|
||||||
*/
|
*/
|
||||||
struct userfaultfd_ctx {
|
struct userfaultfd_ctx {
|
||||||
/* waitqueue head for the pending (i.e. not read) userfaults */
|
/* waitqueue head for the pending (i.e. not read) userfaults */
|
||||||
|
@ -458,7 +468,7 @@ vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason)
|
||||||
blocking_state = return_to_userland ? TASK_INTERRUPTIBLE :
|
blocking_state = return_to_userland ? TASK_INTERRUPTIBLE :
|
||||||
TASK_KILLABLE;
|
TASK_KILLABLE;
|
||||||
|
|
||||||
spin_lock(&ctx->fault_pending_wqh.lock);
|
spin_lock_irq(&ctx->fault_pending_wqh.lock);
|
||||||
/*
|
/*
|
||||||
* After the __add_wait_queue the uwq is visible to userland
|
* After the __add_wait_queue the uwq is visible to userland
|
||||||
* through poll/read().
|
* through poll/read().
|
||||||
|
@ -470,7 +480,7 @@ vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason)
|
||||||
* __add_wait_queue.
|
* __add_wait_queue.
|
||||||
*/
|
*/
|
||||||
set_current_state(blocking_state);
|
set_current_state(blocking_state);
|
||||||
spin_unlock(&ctx->fault_pending_wqh.lock);
|
spin_unlock_irq(&ctx->fault_pending_wqh.lock);
|
||||||
|
|
||||||
if (!is_vm_hugetlb_page(vmf->vma))
|
if (!is_vm_hugetlb_page(vmf->vma))
|
||||||
must_wait = userfaultfd_must_wait(ctx, vmf->address, vmf->flags,
|
must_wait = userfaultfd_must_wait(ctx, vmf->address, vmf->flags,
|
||||||
|
@ -552,13 +562,13 @@ vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason)
|
||||||
* kernel stack can be released after the list_del_init.
|
* kernel stack can be released after the list_del_init.
|
||||||
*/
|
*/
|
||||||
if (!list_empty_careful(&uwq.wq.entry)) {
|
if (!list_empty_careful(&uwq.wq.entry)) {
|
||||||
spin_lock(&ctx->fault_pending_wqh.lock);
|
spin_lock_irq(&ctx->fault_pending_wqh.lock);
|
||||||
/*
|
/*
|
||||||
* No need of list_del_init(), the uwq on the stack
|
* No need of list_del_init(), the uwq on the stack
|
||||||
* will be freed shortly anyway.
|
* will be freed shortly anyway.
|
||||||
*/
|
*/
|
||||||
list_del(&uwq.wq.entry);
|
list_del(&uwq.wq.entry);
|
||||||
spin_unlock(&ctx->fault_pending_wqh.lock);
|
spin_unlock_irq(&ctx->fault_pending_wqh.lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -583,7 +593,7 @@ static void userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx,
|
||||||
init_waitqueue_entry(&ewq->wq, current);
|
init_waitqueue_entry(&ewq->wq, current);
|
||||||
release_new_ctx = NULL;
|
release_new_ctx = NULL;
|
||||||
|
|
||||||
spin_lock(&ctx->event_wqh.lock);
|
spin_lock_irq(&ctx->event_wqh.lock);
|
||||||
/*
|
/*
|
||||||
* After the __add_wait_queue the uwq is visible to userland
|
* After the __add_wait_queue the uwq is visible to userland
|
||||||
* through poll/read().
|
* through poll/read().
|
||||||
|
@ -613,15 +623,15 @@ static void userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx,
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
spin_unlock(&ctx->event_wqh.lock);
|
spin_unlock_irq(&ctx->event_wqh.lock);
|
||||||
|
|
||||||
wake_up_poll(&ctx->fd_wqh, EPOLLIN);
|
wake_up_poll(&ctx->fd_wqh, EPOLLIN);
|
||||||
schedule();
|
schedule();
|
||||||
|
|
||||||
spin_lock(&ctx->event_wqh.lock);
|
spin_lock_irq(&ctx->event_wqh.lock);
|
||||||
}
|
}
|
||||||
__set_current_state(TASK_RUNNING);
|
__set_current_state(TASK_RUNNING);
|
||||||
spin_unlock(&ctx->event_wqh.lock);
|
spin_unlock_irq(&ctx->event_wqh.lock);
|
||||||
|
|
||||||
if (release_new_ctx) {
|
if (release_new_ctx) {
|
||||||
struct vm_area_struct *vma;
|
struct vm_area_struct *vma;
|
||||||
|
@ -918,10 +928,10 @@ wakeup:
|
||||||
* the last page faults that may have been already waiting on
|
* the last page faults that may have been already waiting on
|
||||||
* the fault_*wqh.
|
* the fault_*wqh.
|
||||||
*/
|
*/
|
||||||
spin_lock(&ctx->fault_pending_wqh.lock);
|
spin_lock_irq(&ctx->fault_pending_wqh.lock);
|
||||||
__wake_up_locked_key(&ctx->fault_pending_wqh, TASK_NORMAL, &range);
|
__wake_up_locked_key(&ctx->fault_pending_wqh, TASK_NORMAL, &range);
|
||||||
__wake_up(&ctx->fault_wqh, TASK_NORMAL, 1, &range);
|
__wake_up(&ctx->fault_wqh, TASK_NORMAL, 1, &range);
|
||||||
spin_unlock(&ctx->fault_pending_wqh.lock);
|
spin_unlock_irq(&ctx->fault_pending_wqh.lock);
|
||||||
|
|
||||||
/* Flush pending events that may still wait on event_wqh */
|
/* Flush pending events that may still wait on event_wqh */
|
||||||
wake_up_all(&ctx->event_wqh);
|
wake_up_all(&ctx->event_wqh);
|
||||||
|
@ -1134,7 +1144,7 @@ static ssize_t userfaultfd_ctx_read(struct userfaultfd_ctx *ctx, int no_wait,
|
||||||
|
|
||||||
if (!ret && msg->event == UFFD_EVENT_FORK) {
|
if (!ret && msg->event == UFFD_EVENT_FORK) {
|
||||||
ret = resolve_userfault_fork(ctx, fork_nctx, msg);
|
ret = resolve_userfault_fork(ctx, fork_nctx, msg);
|
||||||
spin_lock(&ctx->event_wqh.lock);
|
spin_lock_irq(&ctx->event_wqh.lock);
|
||||||
if (!list_empty(&fork_event)) {
|
if (!list_empty(&fork_event)) {
|
||||||
/*
|
/*
|
||||||
* The fork thread didn't abort, so we can
|
* The fork thread didn't abort, so we can
|
||||||
|
@ -1180,7 +1190,7 @@ static ssize_t userfaultfd_ctx_read(struct userfaultfd_ctx *ctx, int no_wait,
|
||||||
if (ret)
|
if (ret)
|
||||||
userfaultfd_ctx_put(fork_nctx);
|
userfaultfd_ctx_put(fork_nctx);
|
||||||
}
|
}
|
||||||
spin_unlock(&ctx->event_wqh.lock);
|
spin_unlock_irq(&ctx->event_wqh.lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
|
@ -1219,14 +1229,14 @@ static ssize_t userfaultfd_read(struct file *file, char __user *buf,
|
||||||
static void __wake_userfault(struct userfaultfd_ctx *ctx,
|
static void __wake_userfault(struct userfaultfd_ctx *ctx,
|
||||||
struct userfaultfd_wake_range *range)
|
struct userfaultfd_wake_range *range)
|
||||||
{
|
{
|
||||||
spin_lock(&ctx->fault_pending_wqh.lock);
|
spin_lock_irq(&ctx->fault_pending_wqh.lock);
|
||||||
/* wake all in the range and autoremove */
|
/* wake all in the range and autoremove */
|
||||||
if (waitqueue_active(&ctx->fault_pending_wqh))
|
if (waitqueue_active(&ctx->fault_pending_wqh))
|
||||||
__wake_up_locked_key(&ctx->fault_pending_wqh, TASK_NORMAL,
|
__wake_up_locked_key(&ctx->fault_pending_wqh, TASK_NORMAL,
|
||||||
range);
|
range);
|
||||||
if (waitqueue_active(&ctx->fault_wqh))
|
if (waitqueue_active(&ctx->fault_wqh))
|
||||||
__wake_up(&ctx->fault_wqh, TASK_NORMAL, 1, range);
|
__wake_up(&ctx->fault_wqh, TASK_NORMAL, 1, range);
|
||||||
spin_unlock(&ctx->fault_pending_wqh.lock);
|
spin_unlock_irq(&ctx->fault_pending_wqh.lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __always_inline void wake_userfault(struct userfaultfd_ctx *ctx,
|
static __always_inline void wake_userfault(struct userfaultfd_ctx *ctx,
|
||||||
|
@ -1881,7 +1891,7 @@ static void userfaultfd_show_fdinfo(struct seq_file *m, struct file *f)
|
||||||
wait_queue_entry_t *wq;
|
wait_queue_entry_t *wq;
|
||||||
unsigned long pending = 0, total = 0;
|
unsigned long pending = 0, total = 0;
|
||||||
|
|
||||||
spin_lock(&ctx->fault_pending_wqh.lock);
|
spin_lock_irq(&ctx->fault_pending_wqh.lock);
|
||||||
list_for_each_entry(wq, &ctx->fault_pending_wqh.head, entry) {
|
list_for_each_entry(wq, &ctx->fault_pending_wqh.head, entry) {
|
||||||
pending++;
|
pending++;
|
||||||
total++;
|
total++;
|
||||||
|
@ -1889,7 +1899,7 @@ static void userfaultfd_show_fdinfo(struct seq_file *m, struct file *f)
|
||||||
list_for_each_entry(wq, &ctx->fault_wqh.head, entry) {
|
list_for_each_entry(wq, &ctx->fault_wqh.head, entry) {
|
||||||
total++;
|
total++;
|
||||||
}
|
}
|
||||||
spin_unlock(&ctx->fault_pending_wqh.lock);
|
spin_unlock_irq(&ctx->fault_pending_wqh.lock);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If more protocols will be added, there will be all shown
|
* If more protocols will be added, there will be all shown
|
||||||
|
|
|
@ -704,7 +704,8 @@ extern unsigned long devm_get_free_pages(struct device *dev,
|
||||||
gfp_t gfp_mask, unsigned int order);
|
gfp_t gfp_mask, unsigned int order);
|
||||||
extern void devm_free_pages(struct device *dev, unsigned long addr);
|
extern void devm_free_pages(struct device *dev, unsigned long addr);
|
||||||
|
|
||||||
void __iomem *devm_ioremap_resource(struct device *dev, struct resource *res);
|
void __iomem *devm_ioremap_resource(struct device *dev,
|
||||||
|
const struct resource *res);
|
||||||
|
|
||||||
void __iomem *devm_of_iomap(struct device *dev,
|
void __iomem *devm_of_iomap(struct device *dev,
|
||||||
struct device_node *node, int index,
|
struct device_node *node, int index,
|
||||||
|
|
|
@ -131,7 +131,8 @@ EXPORT_SYMBOL(devm_iounmap);
|
||||||
* if (IS_ERR(base))
|
* if (IS_ERR(base))
|
||||||
* return PTR_ERR(base);
|
* return PTR_ERR(base);
|
||||||
*/
|
*/
|
||||||
void __iomem *devm_ioremap_resource(struct device *dev, struct resource *res)
|
void __iomem *devm_ioremap_resource(struct device *dev,
|
||||||
|
const struct resource *res)
|
||||||
{
|
{
|
||||||
resource_size_t size;
|
resource_size_t size;
|
||||||
void __iomem *dest_ptr;
|
void __iomem *dest_ptr;
|
||||||
|
|
|
@ -1826,7 +1826,8 @@ deferred_grow_zone(struct zone *zone, unsigned int order)
|
||||||
first_deferred_pfn)) {
|
first_deferred_pfn)) {
|
||||||
pgdat->first_deferred_pfn = ULONG_MAX;
|
pgdat->first_deferred_pfn = ULONG_MAX;
|
||||||
pgdat_resize_unlock(pgdat, &flags);
|
pgdat_resize_unlock(pgdat, &flags);
|
||||||
return true;
|
/* Retry only once. */
|
||||||
|
return first_deferred_pfn != ULONG_MAX;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
13
mm/page_io.c
13
mm/page_io.c
|
@ -137,8 +137,10 @@ out:
|
||||||
unlock_page(page);
|
unlock_page(page);
|
||||||
WRITE_ONCE(bio->bi_private, NULL);
|
WRITE_ONCE(bio->bi_private, NULL);
|
||||||
bio_put(bio);
|
bio_put(bio);
|
||||||
blk_wake_io_task(waiter);
|
if (waiter) {
|
||||||
put_task_struct(waiter);
|
blk_wake_io_task(waiter);
|
||||||
|
put_task_struct(waiter);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
int generic_swapfile_activate(struct swap_info_struct *sis,
|
int generic_swapfile_activate(struct swap_info_struct *sis,
|
||||||
|
@ -395,11 +397,12 @@ int swap_readpage(struct page *page, bool synchronous)
|
||||||
* Keep this task valid during swap readpage because the oom killer may
|
* Keep this task valid during swap readpage because the oom killer may
|
||||||
* attempt to access it in the page fault retry time check.
|
* attempt to access it in the page fault retry time check.
|
||||||
*/
|
*/
|
||||||
get_task_struct(current);
|
|
||||||
bio->bi_private = current;
|
|
||||||
bio_set_op_attrs(bio, REQ_OP_READ, 0);
|
bio_set_op_attrs(bio, REQ_OP_READ, 0);
|
||||||
if (synchronous)
|
if (synchronous) {
|
||||||
bio->bi_opf |= REQ_HIPRI;
|
bio->bi_opf |= REQ_HIPRI;
|
||||||
|
get_task_struct(current);
|
||||||
|
bio->bi_private = current;
|
||||||
|
}
|
||||||
count_vm_event(PSWPIN);
|
count_vm_event(PSWPIN);
|
||||||
bio_get(bio);
|
bio_get(bio);
|
||||||
qc = submit_bio(bio);
|
qc = submit_bio(bio);
|
||||||
|
|
27
mm/vmscan.c
27
mm/vmscan.c
|
@ -3644,19 +3644,18 @@ out:
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* pgdat->kswapd_classzone_idx is the highest zone index that a recent
|
* The pgdat->kswapd_classzone_idx is used to pass the highest zone index to be
|
||||||
* allocation request woke kswapd for. When kswapd has not woken recently,
|
* reclaimed by kswapd from the waker. If the value is MAX_NR_ZONES which is not
|
||||||
* the value is MAX_NR_ZONES which is not a valid index. This compares a
|
* a valid index then either kswapd runs for first time or kswapd couldn't sleep
|
||||||
* given classzone and returns it or the highest classzone index kswapd
|
* after previous reclaim attempt (node is still unbalanced). In that case
|
||||||
* was recently woke for.
|
* return the zone index of the previous kswapd reclaim cycle.
|
||||||
*/
|
*/
|
||||||
static enum zone_type kswapd_classzone_idx(pg_data_t *pgdat,
|
static enum zone_type kswapd_classzone_idx(pg_data_t *pgdat,
|
||||||
enum zone_type classzone_idx)
|
enum zone_type prev_classzone_idx)
|
||||||
{
|
{
|
||||||
if (pgdat->kswapd_classzone_idx == MAX_NR_ZONES)
|
if (pgdat->kswapd_classzone_idx == MAX_NR_ZONES)
|
||||||
return classzone_idx;
|
return prev_classzone_idx;
|
||||||
|
return pgdat->kswapd_classzone_idx;
|
||||||
return max(pgdat->kswapd_classzone_idx, classzone_idx);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void kswapd_try_to_sleep(pg_data_t *pgdat, int alloc_order, int reclaim_order,
|
static void kswapd_try_to_sleep(pg_data_t *pgdat, int alloc_order, int reclaim_order,
|
||||||
|
@ -3797,7 +3796,7 @@ kswapd_try_sleep:
|
||||||
|
|
||||||
/* Read the new order and classzone_idx */
|
/* Read the new order and classzone_idx */
|
||||||
alloc_order = reclaim_order = pgdat->kswapd_order;
|
alloc_order = reclaim_order = pgdat->kswapd_order;
|
||||||
classzone_idx = kswapd_classzone_idx(pgdat, 0);
|
classzone_idx = kswapd_classzone_idx(pgdat, classzone_idx);
|
||||||
pgdat->kswapd_order = 0;
|
pgdat->kswapd_order = 0;
|
||||||
pgdat->kswapd_classzone_idx = MAX_NR_ZONES;
|
pgdat->kswapd_classzone_idx = MAX_NR_ZONES;
|
||||||
|
|
||||||
|
@ -3851,8 +3850,12 @@ void wakeup_kswapd(struct zone *zone, gfp_t gfp_flags, int order,
|
||||||
if (!cpuset_zone_allowed(zone, gfp_flags))
|
if (!cpuset_zone_allowed(zone, gfp_flags))
|
||||||
return;
|
return;
|
||||||
pgdat = zone->zone_pgdat;
|
pgdat = zone->zone_pgdat;
|
||||||
pgdat->kswapd_classzone_idx = kswapd_classzone_idx(pgdat,
|
|
||||||
classzone_idx);
|
if (pgdat->kswapd_classzone_idx == MAX_NR_ZONES)
|
||||||
|
pgdat->kswapd_classzone_idx = classzone_idx;
|
||||||
|
else
|
||||||
|
pgdat->kswapd_classzone_idx = max(pgdat->kswapd_classzone_idx,
|
||||||
|
classzone_idx);
|
||||||
pgdat->kswapd_order = max(pgdat->kswapd_order, order);
|
pgdat->kswapd_order = max(pgdat->kswapd_order, order);
|
||||||
if (!waitqueue_active(&pgdat->kswapd_wait))
|
if (!waitqueue_active(&pgdat->kswapd_wait))
|
||||||
return;
|
return;
|
||||||
|
|
Loading…
Reference in New Issue