Fix get_user_pages() race for write access
There's no real guarantee that handle_mm_fault() will always be able to break a COW situation - if an update from another thread ends up modifying the page table some way, handle_mm_fault() may end up requiring us to re-try the operation. That's normally fine, but get_user_pages() ended up re-trying it as a read, and thus a write access could in theory end up losing the dirty bit or be done on a page that had not been properly COW'ed. This makes get_user_pages() always retry write accesses as write accesses by making "follow_page()" require that a writable follow has the dirty bit set. That simplifies the code and solves the race: if the COW break fails for some reason, we'll just loop around and try again. Signed-off-by: Linus Torvalds <torvalds@osdl.org>
This commit is contained in:
parent
8d894c4797
commit
4ceb5db975
21
mm/memory.c
21
mm/memory.c
|
@ -811,18 +811,15 @@ static struct page *__follow_page(struct mm_struct *mm, unsigned long address,
|
||||||
pte = *ptep;
|
pte = *ptep;
|
||||||
pte_unmap(ptep);
|
pte_unmap(ptep);
|
||||||
if (pte_present(pte)) {
|
if (pte_present(pte)) {
|
||||||
if (write && !pte_write(pte))
|
if (write && !pte_dirty(pte))
|
||||||
goto out;
|
goto out;
|
||||||
if (read && !pte_read(pte))
|
if (read && !pte_read(pte))
|
||||||
goto out;
|
goto out;
|
||||||
pfn = pte_pfn(pte);
|
pfn = pte_pfn(pte);
|
||||||
if (pfn_valid(pfn)) {
|
if (pfn_valid(pfn)) {
|
||||||
page = pfn_to_page(pfn);
|
page = pfn_to_page(pfn);
|
||||||
if (accessed) {
|
if (accessed)
|
||||||
if (write && !pte_dirty(pte) &&!PageDirty(page))
|
|
||||||
set_page_dirty(page);
|
|
||||||
mark_page_accessed(page);
|
mark_page_accessed(page);
|
||||||
}
|
|
||||||
return page;
|
return page;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -941,10 +938,9 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
|
||||||
spin_lock(&mm->page_table_lock);
|
spin_lock(&mm->page_table_lock);
|
||||||
do {
|
do {
|
||||||
struct page *page;
|
struct page *page;
|
||||||
int lookup_write = write;
|
|
||||||
|
|
||||||
cond_resched_lock(&mm->page_table_lock);
|
cond_resched_lock(&mm->page_table_lock);
|
||||||
while (!(page = follow_page(mm, start, lookup_write))) {
|
while (!(page = follow_page(mm, start, write))) {
|
||||||
/*
|
/*
|
||||||
* Shortcut for anonymous pages. We don't want
|
* Shortcut for anonymous pages. We don't want
|
||||||
* to force the creation of pages tables for
|
* to force the creation of pages tables for
|
||||||
|
@ -952,8 +948,7 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
|
||||||
* nobody touched so far. This is important
|
* nobody touched so far. This is important
|
||||||
* for doing a core dump for these mappings.
|
* for doing a core dump for these mappings.
|
||||||
*/
|
*/
|
||||||
if (!lookup_write &&
|
if (!write && untouched_anonymous_page(mm,vma,start)) {
|
||||||
untouched_anonymous_page(mm,vma,start)) {
|
|
||||||
page = ZERO_PAGE(start);
|
page = ZERO_PAGE(start);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -972,14 +967,6 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
|
||||||
default:
|
default:
|
||||||
BUG();
|
BUG();
|
||||||
}
|
}
|
||||||
/*
|
|
||||||
* Now that we have performed a write fault
|
|
||||||
* and surely no longer have a shared page we
|
|
||||||
* shouldn't write, we shouldn't ignore an
|
|
||||||
* unwritable page in the page table if
|
|
||||||
* we are forcing write access.
|
|
||||||
*/
|
|
||||||
lookup_write = write && !force;
|
|
||||||
spin_lock(&mm->page_table_lock);
|
spin_lock(&mm->page_table_lock);
|
||||||
}
|
}
|
||||||
if (pages) {
|
if (pages) {
|
||||||
|
|
Loading…
Reference in New Issue