From 0a27a14a62921b438bb6f33772690d345a089be6 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Sun, 6 May 2007 14:49:53 -0700 Subject: [PATCH] mm: madvise avoid exclusive mmap_sem Avoid down_write of the mmap_sem in madvise when we can help it. Acked-by: Hugh Dickins Signed-off-by: Nick Piggin Cc: Rik van Riel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/madvise.c | 33 +++++++++++++++++++++++++++++---- 1 file changed, 29 insertions(+), 4 deletions(-) diff --git a/mm/madvise.c b/mm/madvise.c index 603c5257ed6e..e75096b5a6d3 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -11,6 +11,24 @@ #include #include +/* + * Any behaviour which results in changes to the vma->vm_flags needs to + * take mmap_sem for writing. Others, which simply traverse vmas, need + * to only take it for reading. + */ +static int madvise_need_mmap_write(int behavior) +{ + switch (behavior) { + case MADV_REMOVE: + case MADV_WILLNEED: + case MADV_DONTNEED: + return 0; + default: + /* be safe, default to 1. list exceptions explicitly */ + return 1; + } +} + /* * We can potentially split a vm area into separate * areas, each area with its own behavior. @@ -183,9 +201,9 @@ static long madvise_remove(struct vm_area_struct *vma, + ((loff_t)vma->vm_pgoff << PAGE_SHIFT); /* vmtruncate_range needs to take i_mutex and i_alloc_sem */ - up_write(¤t->mm->mmap_sem); + up_read(¤t->mm->mmap_sem); error = vmtruncate_range(mapping->host, offset, endoff); - down_write(¤t->mm->mmap_sem); + down_read(¤t->mm->mmap_sem); return error; } @@ -270,7 +288,10 @@ asmlinkage long sys_madvise(unsigned long start, size_t len_in, int behavior) int error = -EINVAL; size_t len; - down_write(¤t->mm->mmap_sem); + if (madvise_need_mmap_write(behavior)) + down_write(¤t->mm->mmap_sem); + else + down_read(¤t->mm->mmap_sem); if (start & ~PAGE_MASK) goto out; @@ -332,6 +353,10 @@ asmlinkage long sys_madvise(unsigned long start, size_t len_in, int behavior) vma = find_vma(current->mm, start); } out: - up_write(¤t->mm->mmap_sem); + if (madvise_need_mmap_write(behavior)) + up_write(¤t->mm->mmap_sem); + else + up_read(¤t->mm->mmap_sem); + return error; }