Merge branch 'akpm' (patches from Andrew)
Merge fixes from Andrew Morton: "11 fixes. The presence of 'thp: reduce indentation level in change_huge_pmd()' is unfortunate. But the patchset had been decently reviewed and tested before we decided it was needed in -stable and I felt it best not to churn things at the last minute" * emailed patches from Andrew Morton <akpm@linux-foundation.org>: mailmap: add Martin Kepplinger's email zsmalloc: expand class bit zram: do not use copy_page with non-page aligned address zram: fix operator precedence to get offset hugetlbfs: fix offset overflow in hugetlbfs mmap thp: fix MADV_DONTNEED vs clear soft dirty race thp: fix MADV_DONTNEED vs. MADV_FREE race mm: drop unused pmdp_huge_get_and_clear_notify() thp: fix MADV_DONTNEED vs. numa balancing race thp: reduce indentation level in change_huge_pmd() z3fold: fix page locking in z3fold_alloc()
This commit is contained in:
commit
a232591ba2
2
.mailmap
2
.mailmap
|
@ -99,6 +99,8 @@ Linas Vepstas <linas@austin.ibm.com>
|
|||
Linus Lüssing <linus.luessing@c0d3.blue> <linus.luessing@web.de>
|
||||
Linus Lüssing <linus.luessing@c0d3.blue> <linus.luessing@ascom.ch>
|
||||
Mark Brown <broonie@sirena.org.uk>
|
||||
Martin Kepplinger <martink@posteo.de> <martin.kepplinger@theobroma-systems.com>
|
||||
Martin Kepplinger <martink@posteo.de> <martin.kepplinger@ginzinger.com>
|
||||
Matthieu CASTET <castet.matthieu@free.fr>
|
||||
Mauro Carvalho Chehab <mchehab@kernel.org> <mchehab@brturbo.com.br>
|
||||
Mauro Carvalho Chehab <mchehab@kernel.org> <maurochehab@gmail.com>
|
||||
|
|
|
@ -523,7 +523,7 @@ static int zram_decompress_page(struct zram *zram, char *mem, u32 index)
|
|||
|
||||
cmem = zs_map_object(meta->mem_pool, handle, ZS_MM_RO);
|
||||
if (size == PAGE_SIZE) {
|
||||
copy_page(mem, cmem);
|
||||
memcpy(mem, cmem, PAGE_SIZE);
|
||||
} else {
|
||||
struct zcomp_strm *zstrm = zcomp_stream_get(zram->comp);
|
||||
|
||||
|
@ -717,7 +717,7 @@ compress_again:
|
|||
|
||||
if ((clen == PAGE_SIZE) && !is_partial_io(bvec)) {
|
||||
src = kmap_atomic(page);
|
||||
copy_page(cmem, src);
|
||||
memcpy(cmem, src, PAGE_SIZE);
|
||||
kunmap_atomic(src);
|
||||
} else {
|
||||
memcpy(cmem, src, clen);
|
||||
|
@ -928,7 +928,7 @@ static int zram_rw_page(struct block_device *bdev, sector_t sector,
|
|||
}
|
||||
|
||||
index = sector >> SECTORS_PER_PAGE_SHIFT;
|
||||
offset = sector & (SECTORS_PER_PAGE - 1) << SECTOR_SHIFT;
|
||||
offset = (sector & (SECTORS_PER_PAGE - 1)) << SECTOR_SHIFT;
|
||||
|
||||
bv.bv_page = page;
|
||||
bv.bv_len = PAGE_SIZE;
|
||||
|
|
|
@ -136,17 +136,26 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
|
|||
vma->vm_flags |= VM_HUGETLB | VM_DONTEXPAND;
|
||||
vma->vm_ops = &hugetlb_vm_ops;
|
||||
|
||||
/*
|
||||
* Offset passed to mmap (before page shift) could have been
|
||||
* negative when represented as a (l)off_t.
|
||||
*/
|
||||
if (((loff_t)vma->vm_pgoff << PAGE_SHIFT) < 0)
|
||||
return -EINVAL;
|
||||
|
||||
if (vma->vm_pgoff & (~huge_page_mask(h) >> PAGE_SHIFT))
|
||||
return -EINVAL;
|
||||
|
||||
vma_len = (loff_t)(vma->vm_end - vma->vm_start);
|
||||
len = vma_len + ((loff_t)vma->vm_pgoff << PAGE_SHIFT);
|
||||
/* check for overflow */
|
||||
if (len < vma_len)
|
||||
return -EINVAL;
|
||||
|
||||
inode_lock(inode);
|
||||
file_accessed(file);
|
||||
|
||||
ret = -ENOMEM;
|
||||
len = vma_len + ((loff_t)vma->vm_pgoff << PAGE_SHIFT);
|
||||
|
||||
if (hugetlb_reserve_pages(inode,
|
||||
vma->vm_pgoff >> huge_page_order(h),
|
||||
len >> huge_page_shift(h), vma,
|
||||
|
@ -155,7 +164,7 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
|
|||
|
||||
ret = 0;
|
||||
if (vma->vm_flags & VM_WRITE && inode->i_size < len)
|
||||
inode->i_size = len;
|
||||
i_size_write(inode, len);
|
||||
out:
|
||||
inode_unlock(inode);
|
||||
|
||||
|
|
|
@ -900,7 +900,14 @@ static inline void clear_soft_dirty(struct vm_area_struct *vma,
|
|||
static inline void clear_soft_dirty_pmd(struct vm_area_struct *vma,
|
||||
unsigned long addr, pmd_t *pmdp)
|
||||
{
|
||||
pmd_t pmd = pmdp_huge_get_and_clear(vma->vm_mm, addr, pmdp);
|
||||
pmd_t pmd = *pmdp;
|
||||
|
||||
/* See comment in change_huge_pmd() */
|
||||
pmdp_invalidate(vma, addr, pmdp);
|
||||
if (pmd_dirty(*pmdp))
|
||||
pmd = pmd_mkdirty(pmd);
|
||||
if (pmd_young(*pmdp))
|
||||
pmd = pmd_mkyoung(pmd);
|
||||
|
||||
pmd = pmd_wrprotect(pmd);
|
||||
pmd = pmd_clear_soft_dirty(pmd);
|
||||
|
|
|
@ -394,18 +394,6 @@ static inline void mmu_notifier_mm_destroy(struct mm_struct *mm)
|
|||
___pud; \
|
||||
})
|
||||
|
||||
#define pmdp_huge_get_and_clear_notify(__mm, __haddr, __pmd) \
|
||||
({ \
|
||||
unsigned long ___haddr = __haddr & HPAGE_PMD_MASK; \
|
||||
pmd_t ___pmd; \
|
||||
\
|
||||
___pmd = pmdp_huge_get_and_clear(__mm, __haddr, __pmd); \
|
||||
mmu_notifier_invalidate_range(__mm, ___haddr, \
|
||||
___haddr + HPAGE_PMD_SIZE); \
|
||||
\
|
||||
___pmd; \
|
||||
})
|
||||
|
||||
/*
|
||||
* set_pte_at_notify() sets the pte _after_ running the notifier.
|
||||
* This is safe to start by updating the secondary MMUs, because the primary MMU
|
||||
|
@ -489,7 +477,6 @@ static inline void mmu_notifier_mm_destroy(struct mm_struct *mm)
|
|||
#define ptep_clear_flush_notify ptep_clear_flush
|
||||
#define pmdp_huge_clear_flush_notify pmdp_huge_clear_flush
|
||||
#define pudp_huge_clear_flush_notify pudp_huge_clear_flush
|
||||
#define pmdp_huge_get_and_clear_notify pmdp_huge_get_and_clear
|
||||
#define set_pte_at_notify set_pte_at
|
||||
|
||||
#endif /* CONFIG_MMU_NOTIFIER */
|
||||
|
|
|
@ -1568,8 +1568,7 @@ bool madvise_free_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
|
|||
deactivate_page(page);
|
||||
|
||||
if (pmd_young(orig_pmd) || pmd_dirty(orig_pmd)) {
|
||||
orig_pmd = pmdp_huge_get_and_clear_full(tlb->mm, addr, pmd,
|
||||
tlb->fullmm);
|
||||
pmdp_invalidate(vma, addr, pmd);
|
||||
orig_pmd = pmd_mkold(orig_pmd);
|
||||
orig_pmd = pmd_mkclean(orig_pmd);
|
||||
|
||||
|
@ -1724,37 +1723,69 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
|
|||
{
|
||||
struct mm_struct *mm = vma->vm_mm;
|
||||
spinlock_t *ptl;
|
||||
int ret = 0;
|
||||
pmd_t entry;
|
||||
bool preserve_write;
|
||||
int ret;
|
||||
|
||||
ptl = __pmd_trans_huge_lock(pmd, vma);
|
||||
if (ptl) {
|
||||
pmd_t entry;
|
||||
bool preserve_write = prot_numa && pmd_write(*pmd);
|
||||
ret = 1;
|
||||
if (!ptl)
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* Avoid trapping faults against the zero page. The read-only
|
||||
* data is likely to be read-cached on the local CPU and
|
||||
* local/remote hits to the zero page are not interesting.
|
||||
*/
|
||||
if (prot_numa && is_huge_zero_pmd(*pmd)) {
|
||||
spin_unlock(ptl);
|
||||
return ret;
|
||||
}
|
||||
preserve_write = prot_numa && pmd_write(*pmd);
|
||||
ret = 1;
|
||||
|
||||
if (!prot_numa || !pmd_protnone(*pmd)) {
|
||||
entry = pmdp_huge_get_and_clear_notify(mm, addr, pmd);
|
||||
entry = pmd_modify(entry, newprot);
|
||||
if (preserve_write)
|
||||
entry = pmd_mk_savedwrite(entry);
|
||||
ret = HPAGE_PMD_NR;
|
||||
set_pmd_at(mm, addr, pmd, entry);
|
||||
BUG_ON(vma_is_anonymous(vma) && !preserve_write &&
|
||||
pmd_write(entry));
|
||||
}
|
||||
spin_unlock(ptl);
|
||||
}
|
||||
/*
|
||||
* Avoid trapping faults against the zero page. The read-only
|
||||
* data is likely to be read-cached on the local CPU and
|
||||
* local/remote hits to the zero page are not interesting.
|
||||
*/
|
||||
if (prot_numa && is_huge_zero_pmd(*pmd))
|
||||
goto unlock;
|
||||
|
||||
if (prot_numa && pmd_protnone(*pmd))
|
||||
goto unlock;
|
||||
|
||||
/*
|
||||
* In case prot_numa, we are under down_read(mmap_sem). It's critical
|
||||
* to not clear pmd intermittently to avoid race with MADV_DONTNEED
|
||||
* which is also under down_read(mmap_sem):
|
||||
*
|
||||
* CPU0: CPU1:
|
||||
* change_huge_pmd(prot_numa=1)
|
||||
* pmdp_huge_get_and_clear_notify()
|
||||
* madvise_dontneed()
|
||||
* zap_pmd_range()
|
||||
* pmd_trans_huge(*pmd) == 0 (without ptl)
|
||||
* // skip the pmd
|
||||
* set_pmd_at();
|
||||
* // pmd is re-established
|
||||
*
|
||||
* The race makes MADV_DONTNEED miss the huge pmd and don't clear it
|
||||
* which may break userspace.
|
||||
*
|
||||
* pmdp_invalidate() is required to make sure we don't miss
|
||||
* dirty/young flags set by hardware.
|
||||
*/
|
||||
entry = *pmd;
|
||||
pmdp_invalidate(vma, addr, pmd);
|
||||
|
||||
/*
|
||||
* Recover dirty/young flags. It relies on pmdp_invalidate to not
|
||||
* corrupt them.
|
||||
*/
|
||||
if (pmd_dirty(*pmd))
|
||||
entry = pmd_mkdirty(entry);
|
||||
if (pmd_young(*pmd))
|
||||
entry = pmd_mkyoung(entry);
|
||||
|
||||
entry = pmd_modify(entry, newprot);
|
||||
if (preserve_write)
|
||||
entry = pmd_mk_savedwrite(entry);
|
||||
ret = HPAGE_PMD_NR;
|
||||
set_pmd_at(mm, addr, pmd, entry);
|
||||
BUG_ON(vma_is_anonymous(vma) && !preserve_write && pmd_write(entry));
|
||||
unlock:
|
||||
spin_unlock(ptl);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
|
|
@ -185,6 +185,12 @@ static inline void z3fold_page_lock(struct z3fold_header *zhdr)
|
|||
spin_lock(&zhdr->page_lock);
|
||||
}
|
||||
|
||||
/* Try to lock a z3fold page */
|
||||
static inline int z3fold_page_trylock(struct z3fold_header *zhdr)
|
||||
{
|
||||
return spin_trylock(&zhdr->page_lock);
|
||||
}
|
||||
|
||||
/* Unlock a z3fold page */
|
||||
static inline void z3fold_page_unlock(struct z3fold_header *zhdr)
|
||||
{
|
||||
|
@ -385,7 +391,7 @@ static int z3fold_alloc(struct z3fold_pool *pool, size_t size, gfp_t gfp,
|
|||
spin_lock(&pool->lock);
|
||||
zhdr = list_first_entry_or_null(&pool->unbuddied[i],
|
||||
struct z3fold_header, buddy);
|
||||
if (!zhdr) {
|
||||
if (!zhdr || !z3fold_page_trylock(zhdr)) {
|
||||
spin_unlock(&pool->lock);
|
||||
continue;
|
||||
}
|
||||
|
@ -394,7 +400,6 @@ static int z3fold_alloc(struct z3fold_pool *pool, size_t size, gfp_t gfp,
|
|||
spin_unlock(&pool->lock);
|
||||
|
||||
page = virt_to_page(zhdr);
|
||||
z3fold_page_lock(zhdr);
|
||||
if (zhdr->first_chunks == 0) {
|
||||
if (zhdr->middle_chunks != 0 &&
|
||||
chunks >= zhdr->start_middle)
|
||||
|
|
|
@ -276,7 +276,7 @@ struct zs_pool {
|
|||
struct zspage {
|
||||
struct {
|
||||
unsigned int fullness:FULLNESS_BITS;
|
||||
unsigned int class:CLASS_BITS;
|
||||
unsigned int class:CLASS_BITS + 1;
|
||||
unsigned int isolated:ISOLATED_BITS;
|
||||
unsigned int magic:MAGIC_VAL_BITS;
|
||||
};
|
||||
|
|
Loading…
Reference in New Issue