[PATCH] can_share_swap_page: use page_mapcount
Remember that ironic get_user_pages race? when the raised page_count on a page swapped out led do_wp_page to decide that it had to copy on write, so substituted a different page into userspace. 2.6.7 onwards have Andrea's solution, where try_to_unmap_one backs out if it finds page_count raised. Which works, but is unsatisfying (rmap.c has no other page_count heuristics), and was found a few months ago to hang an intensive page migration test. A year ago I was hesitant to engage page_mapcount, now it seems the right fix. So remove the page_count hack from try_to_unmap_one; and use activate_page in unuse_mm when dropping lock, to replace its secondary effect of helping swapoff to make progress in that case. Simplify can_share_swap_page (now called only on anonymous pages) to check page_mapcount + page_swapcount == 1: still needs the page lock to stabilize their (pessimistic) sum, but does not need swapper_space.tree_lock for that. In do_swap_page, move swap_free and unlock_page below page_add_anon_rmap, to keep sum on the high side, and correct when can_share_swap_page called. Signed-off-by: Hugh Dickins <hugh@veritas.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
This commit is contained in:
parent
d296e9cd02
commit
c475a8ab62
10
mm/memory.c
10
mm/memory.c
|
@ -1686,10 +1686,6 @@ static int do_swap_page(struct mm_struct * mm,
|
||||||
}
|
}
|
||||||
|
|
||||||
/* The page isn't present yet, go ahead with the fault. */
|
/* The page isn't present yet, go ahead with the fault. */
|
||||||
|
|
||||||
swap_free(entry);
|
|
||||||
if (vm_swap_full())
|
|
||||||
remove_exclusive_swap_page(page);
|
|
||||||
|
|
||||||
inc_mm_counter(mm, rss);
|
inc_mm_counter(mm, rss);
|
||||||
pte = mk_pte(page, vma->vm_page_prot);
|
pte = mk_pte(page, vma->vm_page_prot);
|
||||||
|
@ -1697,12 +1693,16 @@ static int do_swap_page(struct mm_struct * mm,
|
||||||
pte = maybe_mkwrite(pte_mkdirty(pte), vma);
|
pte = maybe_mkwrite(pte_mkdirty(pte), vma);
|
||||||
write_access = 0;
|
write_access = 0;
|
||||||
}
|
}
|
||||||
unlock_page(page);
|
|
||||||
|
|
||||||
flush_icache_page(vma, page);
|
flush_icache_page(vma, page);
|
||||||
set_pte_at(mm, address, page_table, pte);
|
set_pte_at(mm, address, page_table, pte);
|
||||||
page_add_anon_rmap(page, vma, address);
|
page_add_anon_rmap(page, vma, address);
|
||||||
|
|
||||||
|
swap_free(entry);
|
||||||
|
if (vm_swap_full())
|
||||||
|
remove_exclusive_swap_page(page);
|
||||||
|
unlock_page(page);
|
||||||
|
|
||||||
if (write_access) {
|
if (write_access) {
|
||||||
if (do_wp_page(mm, vma, address,
|
if (do_wp_page(mm, vma, address,
|
||||||
page_table, pmd, pte) == VM_FAULT_OOM)
|
page_table, pmd, pte) == VM_FAULT_OOM)
|
||||||
|
|
21
mm/rmap.c
21
mm/rmap.c
|
@ -539,27 +539,6 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma)
|
||||||
goto out_unmap;
|
goto out_unmap;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* Don't pull an anonymous page out from under get_user_pages.
|
|
||||||
* GUP carefully breaks COW and raises page count (while holding
|
|
||||||
* page_table_lock, as we have here) to make sure that the page
|
|
||||||
* cannot be freed. If we unmap that page here, a user write
|
|
||||||
* access to the virtual address will bring back the page, but
|
|
||||||
* its raised count will (ironically) be taken to mean it's not
|
|
||||||
* an exclusive swap page, do_wp_page will replace it by a copy
|
|
||||||
* page, and the user never get to see the data GUP was holding
|
|
||||||
* the original page for.
|
|
||||||
*
|
|
||||||
* This test is also useful for when swapoff (unuse_process) has
|
|
||||||
* to drop page lock: its reference to the page stops existing
|
|
||||||
* ptes from being unmapped, so swapoff can make progress.
|
|
||||||
*/
|
|
||||||
if (PageSwapCache(page) &&
|
|
||||||
page_count(page) != page_mapcount(page) + 2) {
|
|
||||||
ret = SWAP_FAIL;
|
|
||||||
goto out_unmap;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Nuke the page table entry. */
|
/* Nuke the page table entry. */
|
||||||
flush_cache_page(vma, address, page_to_pfn(page));
|
flush_cache_page(vma, address, page_to_pfn(page));
|
||||||
pteval = ptep_clear_flush(vma, address, pte);
|
pteval = ptep_clear_flush(vma, address, pte);
|
||||||
|
|
|
@ -276,61 +276,37 @@ void swap_free(swp_entry_t entry)
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Check if we're the only user of a swap page,
|
* How many references to page are currently swapped out?
|
||||||
* when the page is locked.
|
|
||||||
*/
|
*/
|
||||||
static int exclusive_swap_page(struct page *page)
|
static inline int page_swapcount(struct page *page)
|
||||||
{
|
{
|
||||||
int retval = 0;
|
int count = 0;
|
||||||
struct swap_info_struct * p;
|
struct swap_info_struct *p;
|
||||||
swp_entry_t entry;
|
swp_entry_t entry;
|
||||||
|
|
||||||
entry.val = page->private;
|
entry.val = page->private;
|
||||||
p = swap_info_get(entry);
|
p = swap_info_get(entry);
|
||||||
if (p) {
|
if (p) {
|
||||||
/* Is the only swap cache user the cache itself? */
|
/* Subtract the 1 for the swap cache itself */
|
||||||
if (p->swap_map[swp_offset(entry)] == 1) {
|
count = p->swap_map[swp_offset(entry)] - 1;
|
||||||
/* Recheck the page count with the swapcache lock held.. */
|
|
||||||
write_lock_irq(&swapper_space.tree_lock);
|
|
||||||
if (page_count(page) == 2)
|
|
||||||
retval = 1;
|
|
||||||
write_unlock_irq(&swapper_space.tree_lock);
|
|
||||||
}
|
|
||||||
swap_info_put(p);
|
swap_info_put(p);
|
||||||
}
|
}
|
||||||
return retval;
|
return count;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We can use this swap cache entry directly
|
* We can use this swap cache entry directly
|
||||||
* if there are no other references to it.
|
* if there are no other references to it.
|
||||||
*
|
|
||||||
* Here "exclusive_swap_page()" does the real
|
|
||||||
* work, but we opportunistically check whether
|
|
||||||
* we need to get all the locks first..
|
|
||||||
*/
|
*/
|
||||||
int can_share_swap_page(struct page *page)
|
int can_share_swap_page(struct page *page)
|
||||||
{
|
{
|
||||||
int retval = 0;
|
int count;
|
||||||
|
|
||||||
if (!PageLocked(page))
|
BUG_ON(!PageLocked(page));
|
||||||
BUG();
|
count = page_mapcount(page);
|
||||||
switch (page_count(page)) {
|
if (count <= 1 && PageSwapCache(page))
|
||||||
case 3:
|
count += page_swapcount(page);
|
||||||
if (!PagePrivate(page))
|
return count == 1;
|
||||||
break;
|
|
||||||
/* Fallthrough */
|
|
||||||
case 2:
|
|
||||||
if (!PageSwapCache(page))
|
|
||||||
break;
|
|
||||||
retval = exclusive_swap_page(page);
|
|
||||||
break;
|
|
||||||
case 1:
|
|
||||||
if (PageReserved(page))
|
|
||||||
break;
|
|
||||||
retval = 1;
|
|
||||||
}
|
|
||||||
return retval;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -529,9 +505,10 @@ static int unuse_mm(struct mm_struct *mm,
|
||||||
|
|
||||||
if (!down_read_trylock(&mm->mmap_sem)) {
|
if (!down_read_trylock(&mm->mmap_sem)) {
|
||||||
/*
|
/*
|
||||||
* Our reference to the page stops try_to_unmap_one from
|
* Activate page so shrink_cache is unlikely to unmap its
|
||||||
* unmapping its ptes, so swapoff can make progress.
|
* ptes while lock is dropped, so swapoff can make progress.
|
||||||
*/
|
*/
|
||||||
|
activate_page(page);
|
||||||
unlock_page(page);
|
unlock_page(page);
|
||||||
down_read(&mm->mmap_sem);
|
down_read(&mm->mmap_sem);
|
||||||
lock_page(page);
|
lock_page(page);
|
||||||
|
|
Loading…
Reference in New Issue