Merge branch 'akpm' (patches from Andrew)
Mergr misc fixes from Andrew Morton: "28 fixes" * emailed patches from Andrew Morton <akpm@linux-foundation.org>: (28 commits) fs/hugetlbfs/inode.c: change put_page/unlock_page order in hugetlbfs_fallocate() mm/hugetlb: fix NULL-pointer dereference on 5-level paging machine autofs: revert "autofs: fix AT_NO_AUTOMOUNT not being honored" autofs: revert "autofs: take more care to not update last_used on path walk" fs/fat/inode.c: fix sb_rdonly() change mm, memcg: fix mem_cgroup_swapout() for THPs mm: migrate: fix an incorrect call of prep_transhuge_page() kmemleak: add scheduling point to kmemleak_scan() scripts/bloat-o-meter: don't fail with division by 0 fs/mbcache.c: make count_objects() more robust Revert "mm/page-writeback.c: print a warning if the vm dirtiness settings are illogical" mm/madvise.c: fix madvise() infinite loop under special circumstances exec: avoid RLIMIT_STACK races with prlimit() IB/core: disable memory registration of filesystem-dax vmas v4l2: disable filesystem-dax mapping support mm: fail get_vaddr_frames() for filesystem-dax mappings mm: introduce get_user_pages_longterm device-dax: implement ->split() to catch invalid munmap attempts mm, hugetlbfs: introduce ->split() to vm_operations_struct scripts/faddr2line: extend usage on generic arch ...
This commit is contained in:
commit
a0908a1b7d
|
@ -158,10 +158,6 @@ Note: the minimum value allowed for dirty_bytes is two pages (in bytes); any
|
|||
value lower than this limit will be ignored and the old configuration will be
|
||||
retained.
|
||||
|
||||
Note: the value of dirty_bytes also must be set greater than
|
||||
dirty_background_bytes or the amount of memory corresponding to
|
||||
dirty_background_ratio.
|
||||
|
||||
==============================================================
|
||||
|
||||
dirty_expire_centisecs
|
||||
|
@ -181,9 +177,6 @@ generating disk writes will itself start writing out dirty data.
|
|||
|
||||
The total available memory is not equal to total system memory.
|
||||
|
||||
Note: dirty_ratio must be set greater than dirty_background_ratio or
|
||||
ratio corresponding to dirty_background_bytes.
|
||||
|
||||
==============================================================
|
||||
|
||||
dirty_writeback_centisecs
|
||||
|
|
|
@ -221,7 +221,6 @@ static inline pte_t pte_mkspecial(pte_t pte)
|
|||
}
|
||||
#define __HAVE_ARCH_PTE_SPECIAL
|
||||
|
||||
#define __HAVE_ARCH_PMD_WRITE
|
||||
#define pmd_write(pmd) (pmd_isclear((pmd), L_PMD_SECT_RDONLY))
|
||||
#define pmd_dirty(pmd) (pmd_isset((pmd), L_PMD_SECT_DIRTY))
|
||||
#define pud_page(pud) pmd_page(__pmd(pud_val(pud)))
|
||||
|
|
|
@ -345,7 +345,6 @@ static inline int pmd_protnone(pmd_t pmd)
|
|||
|
||||
#define pmd_thp_or_huge(pmd) (pmd_huge(pmd) || pmd_trans_huge(pmd))
|
||||
|
||||
#define __HAVE_ARCH_PMD_WRITE
|
||||
#define pmd_write(pmd) pte_write(pmd_pte(pmd))
|
||||
|
||||
#define pmd_mkhuge(pmd) (__pmd(pmd_val(pmd) & ~PMD_TABLE_BIT))
|
||||
|
|
|
@ -552,7 +552,7 @@ static inline pmd_t pmd_mkhuge(pmd_t pmd)
|
|||
extern void set_pmd_at(struct mm_struct *mm, unsigned long addr,
|
||||
pmd_t *pmdp, pmd_t pmd);
|
||||
|
||||
#define __HAVE_ARCH_PMD_WRITE
|
||||
#define pmd_write pmd_write
|
||||
static inline int pmd_write(pmd_t pmd)
|
||||
{
|
||||
return !!(pmd_val(pmd) & _PAGE_WRITE);
|
||||
|
|
|
@ -1005,7 +1005,6 @@ static inline int pmd_protnone(pmd_t pmd)
|
|||
}
|
||||
#endif /* CONFIG_NUMA_BALANCING */
|
||||
|
||||
#define __HAVE_ARCH_PMD_WRITE
|
||||
#define pmd_write(pmd) pte_write(pmd_pte(pmd))
|
||||
#define __pmd_write(pmd) __pte_write(pmd_pte(pmd))
|
||||
#define pmd_savedwrite(pmd) pte_savedwrite(pmd_pte(pmd))
|
||||
|
|
|
@ -709,7 +709,7 @@ static inline unsigned long pmd_pfn(pmd_t pmd)
|
|||
return (pmd_val(pmd) & origin_mask) >> PAGE_SHIFT;
|
||||
}
|
||||
|
||||
#define __HAVE_ARCH_PMD_WRITE
|
||||
#define pmd_write pmd_write
|
||||
static inline int pmd_write(pmd_t pmd)
|
||||
{
|
||||
return (pmd_val(pmd) & _SEGMENT_ENTRY_WRITE) != 0;
|
||||
|
@ -1264,6 +1264,12 @@ static inline pud_t pud_mkwrite(pud_t pud)
|
|||
return pud;
|
||||
}
|
||||
|
||||
#define pud_write pud_write
|
||||
static inline int pud_write(pud_t pud)
|
||||
{
|
||||
return (pud_val(pud) & _REGION3_ENTRY_WRITE) != 0;
|
||||
}
|
||||
|
||||
static inline pud_t pud_mkclean(pud_t pud)
|
||||
{
|
||||
if (pud_large(pud)) {
|
||||
|
|
|
@ -715,7 +715,7 @@ static inline unsigned long pmd_pfn(pmd_t pmd)
|
|||
return pte_pfn(pte);
|
||||
}
|
||||
|
||||
#define __HAVE_ARCH_PMD_WRITE
|
||||
#define pmd_write pmd_write
|
||||
static inline unsigned long pmd_write(pmd_t pmd)
|
||||
{
|
||||
pte_t pte = __pte(pmd_val(pmd));
|
||||
|
|
|
@ -75,7 +75,7 @@ static int gup_huge_pmd(pmd_t *pmdp, pmd_t pmd, unsigned long addr,
|
|||
if (!(pmd_val(pmd) & _PAGE_VALID))
|
||||
return 0;
|
||||
|
||||
if (write && !pmd_write(pmd))
|
||||
if (!pmd_access_permitted(pmd, write))
|
||||
return 0;
|
||||
|
||||
refs = 0;
|
||||
|
@ -114,7 +114,7 @@ static int gup_huge_pud(pud_t *pudp, pud_t pud, unsigned long addr,
|
|||
if (!(pud_val(pud) & _PAGE_VALID))
|
||||
return 0;
|
||||
|
||||
if (write && !pud_write(pud))
|
||||
if (!pud_access_permitted(pud, write))
|
||||
return 0;
|
||||
|
||||
refs = 0;
|
||||
|
|
|
@ -475,7 +475,6 @@ static inline void pmd_clear(pmd_t *pmdp)
|
|||
#define pmd_mkdirty(pmd) pte_pmd(pte_mkdirty(pmd_pte(pmd)))
|
||||
#define pmd_huge_page(pmd) pte_huge(pmd_pte(pmd))
|
||||
#define pmd_mkhuge(pmd) pte_pmd(pte_mkhuge(pmd_pte(pmd)))
|
||||
#define __HAVE_ARCH_PMD_WRITE
|
||||
|
||||
#define pfn_pmd(pfn, pgprot) pte_pmd(pfn_pte((pfn), (pgprot)))
|
||||
#define pmd_pfn(pmd) pte_pfn(pmd_pte(pmd))
|
||||
|
|
|
@ -1061,7 +1061,7 @@ extern int pmdp_clear_flush_young(struct vm_area_struct *vma,
|
|||
unsigned long address, pmd_t *pmdp);
|
||||
|
||||
|
||||
#define __HAVE_ARCH_PMD_WRITE
|
||||
#define pmd_write pmd_write
|
||||
static inline int pmd_write(pmd_t pmd)
|
||||
{
|
||||
return pmd_flags(pmd) & _PAGE_RW;
|
||||
|
@ -1088,6 +1088,12 @@ static inline void pmdp_set_wrprotect(struct mm_struct *mm,
|
|||
clear_bit(_PAGE_BIT_RW, (unsigned long *)pmdp);
|
||||
}
|
||||
|
||||
#define pud_write pud_write
|
||||
static inline int pud_write(pud_t pud)
|
||||
{
|
||||
return pud_flags(pud) & _PAGE_RW;
|
||||
}
|
||||
|
||||
/*
|
||||
* clone_pgd_range(pgd_t *dst, pgd_t *src, int count);
|
||||
*
|
||||
|
|
|
@ -428,9 +428,21 @@ static int dev_dax_fault(struct vm_fault *vmf)
|
|||
return dev_dax_huge_fault(vmf, PE_SIZE_PTE);
|
||||
}
|
||||
|
||||
static int dev_dax_split(struct vm_area_struct *vma, unsigned long addr)
|
||||
{
|
||||
struct file *filp = vma->vm_file;
|
||||
struct dev_dax *dev_dax = filp->private_data;
|
||||
struct dax_region *dax_region = dev_dax->region;
|
||||
|
||||
if (!IS_ALIGNED(addr, dax_region->align))
|
||||
return -EINVAL;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const struct vm_operations_struct dax_vm_ops = {
|
||||
.fault = dev_dax_fault,
|
||||
.huge_fault = dev_dax_huge_fault,
|
||||
.split = dev_dax_split,
|
||||
};
|
||||
|
||||
static int dax_mmap(struct file *filp, struct vm_area_struct *vma)
|
||||
|
|
|
@ -191,7 +191,7 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
|
|||
sg_list_start = umem->sg_head.sgl;
|
||||
|
||||
while (npages) {
|
||||
ret = get_user_pages(cur_base,
|
||||
ret = get_user_pages_longterm(cur_base,
|
||||
min_t(unsigned long, npages,
|
||||
PAGE_SIZE / sizeof (struct page *)),
|
||||
gup_flags, page_list, vma_list);
|
||||
|
|
|
@ -185,12 +185,13 @@ static int videobuf_dma_init_user_locked(struct videobuf_dmabuf *dma,
|
|||
dprintk(1, "init user [0x%lx+0x%lx => %d pages]\n",
|
||||
data, size, dma->nr_pages);
|
||||
|
||||
err = get_user_pages(data & PAGE_MASK, dma->nr_pages,
|
||||
err = get_user_pages_longterm(data & PAGE_MASK, dma->nr_pages,
|
||||
flags, dma->pages, NULL);
|
||||
|
||||
if (err != dma->nr_pages) {
|
||||
dma->nr_pages = (err >= 0) ? err : 0;
|
||||
dprintk(1, "get_user_pages: err=%d [%d]\n", err, dma->nr_pages);
|
||||
dprintk(1, "get_user_pages_longterm: err=%d [%d]\n", err,
|
||||
dma->nr_pages);
|
||||
return err < 0 ? err : -EINVAL;
|
||||
}
|
||||
return 0;
|
||||
|
|
|
@ -281,8 +281,8 @@ static int autofs4_mount_wait(const struct path *path, bool rcu_walk)
|
|||
pr_debug("waiting for mount name=%pd\n", path->dentry);
|
||||
status = autofs4_wait(sbi, path, NFY_MOUNT);
|
||||
pr_debug("mount wait done status=%d\n", status);
|
||||
ino->last_used = jiffies;
|
||||
}
|
||||
ino->last_used = jiffies;
|
||||
return status;
|
||||
}
|
||||
|
||||
|
@ -321,21 +321,16 @@ static struct dentry *autofs4_mountpoint_changed(struct path *path)
|
|||
*/
|
||||
if (autofs_type_indirect(sbi->type) && d_unhashed(dentry)) {
|
||||
struct dentry *parent = dentry->d_parent;
|
||||
struct autofs_info *ino;
|
||||
struct dentry *new;
|
||||
|
||||
new = d_lookup(parent, &dentry->d_name);
|
||||
if (!new)
|
||||
return NULL;
|
||||
if (new == dentry)
|
||||
dput(new);
|
||||
else {
|
||||
struct autofs_info *ino;
|
||||
|
||||
ino = autofs4_dentry_ino(new);
|
||||
ino->last_used = jiffies;
|
||||
dput(path->dentry);
|
||||
path->dentry = new;
|
||||
}
|
||||
ino = autofs4_dentry_ino(new);
|
||||
ino->last_used = jiffies;
|
||||
dput(path->dentry);
|
||||
path->dentry = new;
|
||||
}
|
||||
return path->dentry;
|
||||
}
|
||||
|
|
3
fs/dax.c
3
fs/dax.c
|
@ -627,7 +627,8 @@ static void dax_mapping_entry_mkclean(struct address_space *mapping,
|
|||
|
||||
if (pfn != pmd_pfn(*pmdp))
|
||||
goto unlock_pmd;
|
||||
if (!pmd_dirty(*pmdp) && !pmd_write(*pmdp))
|
||||
if (!pmd_dirty(*pmdp)
|
||||
&& !pmd_access_permitted(*pmdp, WRITE))
|
||||
goto unlock_pmd;
|
||||
|
||||
flush_cache_page(vma, address, pfn);
|
||||
|
|
|
@ -1340,10 +1340,15 @@ void setup_new_exec(struct linux_binprm * bprm)
|
|||
* avoid bad behavior from the prior rlimits. This has to
|
||||
* happen before arch_pick_mmap_layout(), which examines
|
||||
* RLIMIT_STACK, but after the point of no return to avoid
|
||||
* needing to clean up the change on failure.
|
||||
* races from other threads changing the limits. This also
|
||||
* must be protected from races with prlimit() calls.
|
||||
*/
|
||||
task_lock(current->group_leader);
|
||||
if (current->signal->rlim[RLIMIT_STACK].rlim_cur > _STK_LIM)
|
||||
current->signal->rlim[RLIMIT_STACK].rlim_cur = _STK_LIM;
|
||||
if (current->signal->rlim[RLIMIT_STACK].rlim_max > _STK_LIM)
|
||||
current->signal->rlim[RLIMIT_STACK].rlim_max = _STK_LIM;
|
||||
task_unlock(current->group_leader);
|
||||
}
|
||||
|
||||
arch_pick_mmap_layout(current->mm);
|
||||
|
|
|
@ -779,7 +779,7 @@ static void __exit fat_destroy_inodecache(void)
|
|||
|
||||
static int fat_remount(struct super_block *sb, int *flags, char *data)
|
||||
{
|
||||
int new_rdonly;
|
||||
bool new_rdonly;
|
||||
struct msdos_sb_info *sbi = MSDOS_SB(sb);
|
||||
*flags |= SB_NODIRATIME | (sbi->options.isvfat ? 0 : SB_NOATIME);
|
||||
|
||||
|
|
|
@ -639,11 +639,11 @@ static long hugetlbfs_fallocate(struct file *file, int mode, loff_t offset,
|
|||
mutex_unlock(&hugetlb_fault_mutex_table[hash]);
|
||||
|
||||
/*
|
||||
* page_put due to reference from alloc_huge_page()
|
||||
* unlock_page because locked by add_to_page_cache()
|
||||
* page_put due to reference from alloc_huge_page()
|
||||
*/
|
||||
put_page(page);
|
||||
unlock_page(page);
|
||||
put_page(page);
|
||||
}
|
||||
|
||||
if (!(mode & FALLOC_FL_KEEP_SIZE) && offset + len > inode->i_size)
|
||||
|
|
|
@ -269,6 +269,9 @@ static unsigned long mb_cache_count(struct shrinker *shrink,
|
|||
struct mb_cache *cache = container_of(shrink, struct mb_cache,
|
||||
c_shrink);
|
||||
|
||||
/* Unlikely, but not impossible */
|
||||
if (unlikely(cache->c_entry_count < 0))
|
||||
return 0;
|
||||
return cache->c_entry_count;
|
||||
}
|
||||
|
||||
|
|
15
fs/namei.c
15
fs/namei.c
|
@ -1129,18 +1129,9 @@ static int follow_automount(struct path *path, struct nameidata *nd,
|
|||
* of the daemon to instantiate them before they can be used.
|
||||
*/
|
||||
if (!(nd->flags & (LOOKUP_PARENT | LOOKUP_DIRECTORY |
|
||||
LOOKUP_OPEN | LOOKUP_CREATE |
|
||||
LOOKUP_AUTOMOUNT))) {
|
||||
/* Positive dentry that isn't meant to trigger an
|
||||
* automount, EISDIR will allow it to be used,
|
||||
* otherwise there's no mount here "now" so return
|
||||
* ENOENT.
|
||||
*/
|
||||
if (path->dentry->d_inode)
|
||||
return -EISDIR;
|
||||
else
|
||||
return -ENOENT;
|
||||
}
|
||||
LOOKUP_OPEN | LOOKUP_CREATE | LOOKUP_AUTOMOUNT)) &&
|
||||
path->dentry->d_inode)
|
||||
return -EISDIR;
|
||||
|
||||
if (path->dentry->d_sb->s_user_ns != &init_user_ns)
|
||||
return -EACCES;
|
||||
|
|
|
@ -805,15 +805,23 @@ static inline int pmd_trans_huge(pmd_t pmd)
|
|||
{
|
||||
return 0;
|
||||
}
|
||||
#ifndef __HAVE_ARCH_PMD_WRITE
|
||||
#ifndef pmd_write
|
||||
static inline int pmd_write(pmd_t pmd)
|
||||
{
|
||||
BUG();
|
||||
return 0;
|
||||
}
|
||||
#endif /* __HAVE_ARCH_PMD_WRITE */
|
||||
#endif /* pmd_write */
|
||||
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
|
||||
|
||||
#ifndef pud_write
|
||||
static inline int pud_write(pud_t pud)
|
||||
{
|
||||
BUG();
|
||||
return 0;
|
||||
}
|
||||
#endif /* pud_write */
|
||||
|
||||
#if !defined(CONFIG_TRANSPARENT_HUGEPAGE) || \
|
||||
(defined(CONFIG_TRANSPARENT_HUGEPAGE) && \
|
||||
!defined(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD))
|
||||
|
|
|
@ -3088,7 +3088,8 @@ static inline int vfs_lstat(const char __user *name, struct kstat *stat)
|
|||
static inline int vfs_fstatat(int dfd, const char __user *filename,
|
||||
struct kstat *stat, int flags)
|
||||
{
|
||||
return vfs_statx(dfd, filename, flags, stat, STATX_BASIC_STATS);
|
||||
return vfs_statx(dfd, filename, flags | AT_NO_AUTOMOUNT,
|
||||
stat, STATX_BASIC_STATS);
|
||||
}
|
||||
static inline int vfs_fstat(int fd, struct kstat *stat)
|
||||
{
|
||||
|
@ -3194,6 +3195,20 @@ static inline bool vma_is_dax(struct vm_area_struct *vma)
|
|||
return vma->vm_file && IS_DAX(vma->vm_file->f_mapping->host);
|
||||
}
|
||||
|
||||
static inline bool vma_is_fsdax(struct vm_area_struct *vma)
|
||||
{
|
||||
struct inode *inode;
|
||||
|
||||
if (!vma->vm_file)
|
||||
return false;
|
||||
if (!vma_is_dax(vma))
|
||||
return false;
|
||||
inode = file_inode(vma->vm_file);
|
||||
if (inode->i_mode == S_IFCHR)
|
||||
return false; /* device-dax */
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline int iocb_flags(struct file *file)
|
||||
{
|
||||
int res = 0;
|
||||
|
|
|
@ -239,14 +239,6 @@ static inline int pgd_write(pgd_t pgd)
|
|||
}
|
||||
#endif
|
||||
|
||||
#ifndef pud_write
|
||||
static inline int pud_write(pud_t pud)
|
||||
{
|
||||
BUG();
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
#define HUGETLB_ANON_FILE "anon_hugepage"
|
||||
|
||||
enum {
|
||||
|
|
|
@ -54,7 +54,7 @@ static inline struct page *new_page_nodemask(struct page *page,
|
|||
new_page = __alloc_pages_nodemask(gfp_mask, order,
|
||||
preferred_nid, nodemask);
|
||||
|
||||
if (new_page && PageTransHuge(page))
|
||||
if (new_page && PageTransHuge(new_page))
|
||||
prep_transhuge_page(new_page);
|
||||
|
||||
return new_page;
|
||||
|
|
|
@ -377,6 +377,7 @@ enum page_entry_size {
|
|||
struct vm_operations_struct {
|
||||
void (*open)(struct vm_area_struct * area);
|
||||
void (*close)(struct vm_area_struct * area);
|
||||
int (*split)(struct vm_area_struct * area, unsigned long addr);
|
||||
int (*mremap)(struct vm_area_struct * area);
|
||||
int (*fault)(struct vm_fault *vmf);
|
||||
int (*huge_fault)(struct vm_fault *vmf, enum page_entry_size pe_size);
|
||||
|
@ -1379,6 +1380,19 @@ long get_user_pages_locked(unsigned long start, unsigned long nr_pages,
|
|||
unsigned int gup_flags, struct page **pages, int *locked);
|
||||
long get_user_pages_unlocked(unsigned long start, unsigned long nr_pages,
|
||||
struct page **pages, unsigned int gup_flags);
|
||||
#ifdef CONFIG_FS_DAX
|
||||
long get_user_pages_longterm(unsigned long start, unsigned long nr_pages,
|
||||
unsigned int gup_flags, struct page **pages,
|
||||
struct vm_area_struct **vmas);
|
||||
#else
|
||||
static inline long get_user_pages_longterm(unsigned long start,
|
||||
unsigned long nr_pages, unsigned int gup_flags,
|
||||
struct page **pages, struct vm_area_struct **vmas)
|
||||
{
|
||||
return get_user_pages(start, nr_pages, gup_flags, pages, vmas);
|
||||
}
|
||||
#endif /* CONFIG_FS_DAX */
|
||||
|
||||
int get_user_pages_fast(unsigned long start, int nr_pages, int write,
|
||||
struct page **pages);
|
||||
|
||||
|
|
|
@ -53,6 +53,18 @@ int get_vaddr_frames(unsigned long start, unsigned int nr_frames,
|
|||
ret = -EFAULT;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/*
|
||||
* While get_vaddr_frames() could be used for transient (kernel
|
||||
* controlled lifetime) pinning of memory pages all current
|
||||
* users establish long term (userspace controlled lifetime)
|
||||
* page pinning. Treat get_vaddr_frames() like
|
||||
* get_user_pages_longterm() and disallow it for filesystem-dax
|
||||
* mappings.
|
||||
*/
|
||||
if (vma_is_fsdax(vma))
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
if (!(vma->vm_flags & (VM_IO | VM_PFNMAP))) {
|
||||
vec->got_ref = true;
|
||||
vec->is_pfns = false;
|
||||
|
|
66
mm/gup.c
66
mm/gup.c
|
@ -66,7 +66,7 @@ static int follow_pfn_pte(struct vm_area_struct *vma, unsigned long address,
|
|||
*/
|
||||
static inline bool can_follow_write_pte(pte_t pte, unsigned int flags)
|
||||
{
|
||||
return pte_write(pte) ||
|
||||
return pte_access_permitted(pte, WRITE) ||
|
||||
((flags & FOLL_FORCE) && (flags & FOLL_COW) && pte_dirty(pte));
|
||||
}
|
||||
|
||||
|
@ -1095,6 +1095,70 @@ long get_user_pages(unsigned long start, unsigned long nr_pages,
|
|||
}
|
||||
EXPORT_SYMBOL(get_user_pages);
|
||||
|
||||
#ifdef CONFIG_FS_DAX
|
||||
/*
|
||||
* This is the same as get_user_pages() in that it assumes we are
|
||||
* operating on the current task's mm, but it goes further to validate
|
||||
* that the vmas associated with the address range are suitable for
|
||||
* longterm elevated page reference counts. For example, filesystem-dax
|
||||
* mappings are subject to the lifetime enforced by the filesystem and
|
||||
* we need guarantees that longterm users like RDMA and V4L2 only
|
||||
* establish mappings that have a kernel enforced revocation mechanism.
|
||||
*
|
||||
* "longterm" == userspace controlled elevated page count lifetime.
|
||||
* Contrast this to iov_iter_get_pages() usages which are transient.
|
||||
*/
|
||||
long get_user_pages_longterm(unsigned long start, unsigned long nr_pages,
|
||||
unsigned int gup_flags, struct page **pages,
|
||||
struct vm_area_struct **vmas_arg)
|
||||
{
|
||||
struct vm_area_struct **vmas = vmas_arg;
|
||||
struct vm_area_struct *vma_prev = NULL;
|
||||
long rc, i;
|
||||
|
||||
if (!pages)
|
||||
return -EINVAL;
|
||||
|
||||
if (!vmas) {
|
||||
vmas = kcalloc(nr_pages, sizeof(struct vm_area_struct *),
|
||||
GFP_KERNEL);
|
||||
if (!vmas)
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
rc = get_user_pages(start, nr_pages, gup_flags, pages, vmas);
|
||||
|
||||
for (i = 0; i < rc; i++) {
|
||||
struct vm_area_struct *vma = vmas[i];
|
||||
|
||||
if (vma == vma_prev)
|
||||
continue;
|
||||
|
||||
vma_prev = vma;
|
||||
|
||||
if (vma_is_fsdax(vma))
|
||||
break;
|
||||
}
|
||||
|
||||
/*
|
||||
* Either get_user_pages() failed, or the vma validation
|
||||
* succeeded, in either case we don't need to put_page() before
|
||||
* returning.
|
||||
*/
|
||||
if (i >= rc)
|
||||
goto out;
|
||||
|
||||
for (i = 0; i < rc; i++)
|
||||
put_page(pages[i]);
|
||||
rc = -EOPNOTSUPP;
|
||||
out:
|
||||
if (vmas != vmas_arg)
|
||||
kfree(vmas);
|
||||
return rc;
|
||||
}
|
||||
EXPORT_SYMBOL(get_user_pages_longterm);
|
||||
#endif /* CONFIG_FS_DAX */
|
||||
|
||||
/**
|
||||
* populate_vma_page_range() - populate a range of pages in the vma.
|
||||
* @vma: target vma
|
||||
|
|
8
mm/hmm.c
8
mm/hmm.c
|
@ -391,11 +391,11 @@ again:
|
|||
if (pmd_protnone(pmd))
|
||||
return hmm_vma_walk_clear(start, end, walk);
|
||||
|
||||
if (write_fault && !pmd_write(pmd))
|
||||
if (!pmd_access_permitted(pmd, write_fault))
|
||||
return hmm_vma_walk_clear(start, end, walk);
|
||||
|
||||
pfn = pmd_pfn(pmd) + pte_index(addr);
|
||||
flag |= pmd_write(pmd) ? HMM_PFN_WRITE : 0;
|
||||
flag |= pmd_access_permitted(pmd, WRITE) ? HMM_PFN_WRITE : 0;
|
||||
for (; addr < end; addr += PAGE_SIZE, i++, pfn++)
|
||||
pfns[i] = hmm_pfn_t_from_pfn(pfn) | flag;
|
||||
return 0;
|
||||
|
@ -456,11 +456,11 @@ again:
|
|||
continue;
|
||||
}
|
||||
|
||||
if (write_fault && !pte_write(pte))
|
||||
if (!pte_access_permitted(pte, write_fault))
|
||||
goto fault;
|
||||
|
||||
pfns[i] = hmm_pfn_t_from_pfn(pte_pfn(pte)) | flag;
|
||||
pfns[i] |= pte_write(pte) ? HMM_PFN_WRITE : 0;
|
||||
pfns[i] |= pte_access_permitted(pte, WRITE) ? HMM_PFN_WRITE : 0;
|
||||
continue;
|
||||
|
||||
fault:
|
||||
|
|
|
@ -870,7 +870,7 @@ struct page *follow_devmap_pmd(struct vm_area_struct *vma, unsigned long addr,
|
|||
*/
|
||||
WARN_ONCE(flags & FOLL_COW, "mm: In follow_devmap_pmd with FOLL_COW set");
|
||||
|
||||
if (flags & FOLL_WRITE && !pmd_write(*pmd))
|
||||
if (!pmd_access_permitted(*pmd, flags & FOLL_WRITE))
|
||||
return NULL;
|
||||
|
||||
if (pmd_present(*pmd) && pmd_devmap(*pmd))
|
||||
|
@ -1012,7 +1012,7 @@ struct page *follow_devmap_pud(struct vm_area_struct *vma, unsigned long addr,
|
|||
|
||||
assert_spin_locked(pud_lockptr(mm, pud));
|
||||
|
||||
if (flags & FOLL_WRITE && !pud_write(*pud))
|
||||
if (!pud_access_permitted(*pud, flags & FOLL_WRITE))
|
||||
return NULL;
|
||||
|
||||
if (pud_present(*pud) && pud_devmap(*pud))
|
||||
|
@ -1386,7 +1386,7 @@ out_unlock:
|
|||
*/
|
||||
static inline bool can_follow_write_pmd(pmd_t pmd, unsigned int flags)
|
||||
{
|
||||
return pmd_write(pmd) ||
|
||||
return pmd_access_permitted(pmd, WRITE) ||
|
||||
((flags & FOLL_FORCE) && (flags & FOLL_COW) && pmd_dirty(pmd));
|
||||
}
|
||||
|
||||
|
|
12
mm/hugetlb.c
12
mm/hugetlb.c
|
@ -3125,6 +3125,13 @@ static void hugetlb_vm_op_close(struct vm_area_struct *vma)
|
|||
}
|
||||
}
|
||||
|
||||
static int hugetlb_vm_op_split(struct vm_area_struct *vma, unsigned long addr)
|
||||
{
|
||||
if (addr & ~(huge_page_mask(hstate_vma(vma))))
|
||||
return -EINVAL;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* We cannot handle pagefaults against hugetlb pages at all. They cause
|
||||
* handle_mm_fault() to try to instantiate regular-sized pages in the
|
||||
|
@ -3141,6 +3148,7 @@ const struct vm_operations_struct hugetlb_vm_ops = {
|
|||
.fault = hugetlb_vm_op_fault,
|
||||
.open = hugetlb_vm_op_open,
|
||||
.close = hugetlb_vm_op_close,
|
||||
.split = hugetlb_vm_op_split,
|
||||
};
|
||||
|
||||
static pte_t make_huge_pte(struct vm_area_struct *vma, struct page *page,
|
||||
|
@ -4627,7 +4635,9 @@ pte_t *huge_pte_alloc(struct mm_struct *mm,
|
|||
pte_t *pte = NULL;
|
||||
|
||||
pgd = pgd_offset(mm, addr);
|
||||
p4d = p4d_offset(pgd, addr);
|
||||
p4d = p4d_alloc(mm, pgd, addr);
|
||||
if (!p4d)
|
||||
return NULL;
|
||||
pud = pud_alloc(mm, p4d, addr);
|
||||
if (pud) {
|
||||
if (sz == PUD_SIZE) {
|
||||
|
|
|
@ -1523,6 +1523,8 @@ static void kmemleak_scan(void)
|
|||
if (page_count(page) == 0)
|
||||
continue;
|
||||
scan_block(page, page + 1, NULL);
|
||||
if (!(pfn % (MAX_SCAN_SIZE / sizeof(*page))))
|
||||
cond_resched();
|
||||
}
|
||||
}
|
||||
put_online_mems();
|
||||
|
|
|
@ -276,15 +276,14 @@ static long madvise_willneed(struct vm_area_struct *vma,
|
|||
{
|
||||
struct file *file = vma->vm_file;
|
||||
|
||||
*prev = vma;
|
||||
#ifdef CONFIG_SWAP
|
||||
if (!file) {
|
||||
*prev = vma;
|
||||
force_swapin_readahead(vma, start, end);
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (shmem_mapping(file->f_mapping)) {
|
||||
*prev = vma;
|
||||
force_shm_swapin_readahead(vma, start, end,
|
||||
file->f_mapping);
|
||||
return 0;
|
||||
|
@ -299,7 +298,6 @@ static long madvise_willneed(struct vm_area_struct *vma,
|
|||
return 0;
|
||||
}
|
||||
|
||||
*prev = vma;
|
||||
start = ((start - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
|
||||
if (end > vma->vm_end)
|
||||
end = vma->vm_end;
|
||||
|
|
|
@ -6044,7 +6044,7 @@ void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
|
|||
memcg_check_events(memcg, page);
|
||||
|
||||
if (!mem_cgroup_is_root(memcg))
|
||||
css_put(&memcg->css);
|
||||
css_put_many(&memcg->css, nr_entries);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -3948,7 +3948,7 @@ static int handle_pte_fault(struct vm_fault *vmf)
|
|||
if (unlikely(!pte_same(*vmf->pte, entry)))
|
||||
goto unlock;
|
||||
if (vmf->flags & FAULT_FLAG_WRITE) {
|
||||
if (!pte_write(entry))
|
||||
if (!pte_access_permitted(entry, WRITE))
|
||||
return do_wp_page(vmf);
|
||||
entry = pte_mkdirty(entry);
|
||||
}
|
||||
|
@ -4013,7 +4013,7 @@ static int __handle_mm_fault(struct vm_area_struct *vma, unsigned long address,
|
|||
|
||||
/* NUMA case for anonymous PUDs would go here */
|
||||
|
||||
if (dirty && !pud_write(orig_pud)) {
|
||||
if (dirty && !pud_access_permitted(orig_pud, WRITE)) {
|
||||
ret = wp_huge_pud(&vmf, orig_pud);
|
||||
if (!(ret & VM_FAULT_FALLBACK))
|
||||
return ret;
|
||||
|
@ -4046,7 +4046,7 @@ static int __handle_mm_fault(struct vm_area_struct *vma, unsigned long address,
|
|||
if (pmd_protnone(orig_pmd) && vma_is_accessible(vma))
|
||||
return do_huge_pmd_numa_page(&vmf, orig_pmd);
|
||||
|
||||
if (dirty && !pmd_write(orig_pmd)) {
|
||||
if (dirty && !pmd_access_permitted(orig_pmd, WRITE)) {
|
||||
ret = wp_huge_pmd(&vmf, orig_pmd);
|
||||
if (!(ret & VM_FAULT_FALLBACK))
|
||||
return ret;
|
||||
|
@ -4336,7 +4336,7 @@ int follow_phys(struct vm_area_struct *vma,
|
|||
goto out;
|
||||
pte = *ptep;
|
||||
|
||||
if ((flags & FOLL_WRITE) && !pte_write(pte))
|
||||
if (!pte_access_permitted(pte, flags & FOLL_WRITE))
|
||||
goto unlock;
|
||||
|
||||
*prot = pgprot_val(pte_pgprot(pte));
|
||||
|
|
|
@ -2555,9 +2555,11 @@ int __split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
|
|||
struct vm_area_struct *new;
|
||||
int err;
|
||||
|
||||
if (is_vm_hugetlb_page(vma) && (addr &
|
||||
~(huge_page_mask(hstate_vma(vma)))))
|
||||
return -EINVAL;
|
||||
if (vma->vm_ops && vma->vm_ops->split) {
|
||||
err = vma->vm_ops->split(vma, addr);
|
||||
if (err)
|
||||
return err;
|
||||
}
|
||||
|
||||
new = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
|
||||
if (!new)
|
||||
|
|
|
@ -550,7 +550,6 @@ static bool __oom_reap_task_mm(struct task_struct *tsk, struct mm_struct *mm)
|
|||
*/
|
||||
set_bit(MMF_UNSTABLE, &mm->flags);
|
||||
|
||||
tlb_gather_mmu(&tlb, mm, 0, -1);
|
||||
for (vma = mm->mmap ; vma; vma = vma->vm_next) {
|
||||
if (!can_madv_dontneed_vma(vma))
|
||||
continue;
|
||||
|
@ -565,11 +564,13 @@ static bool __oom_reap_task_mm(struct task_struct *tsk, struct mm_struct *mm)
|
|||
* we do not want to block exit_mmap by keeping mm ref
|
||||
* count elevated without a good reason.
|
||||
*/
|
||||
if (vma_is_anonymous(vma) || !(vma->vm_flags & VM_SHARED))
|
||||
if (vma_is_anonymous(vma) || !(vma->vm_flags & VM_SHARED)) {
|
||||
tlb_gather_mmu(&tlb, mm, vma->vm_start, vma->vm_end);
|
||||
unmap_page_range(&tlb, vma, vma->vm_start, vma->vm_end,
|
||||
NULL);
|
||||
tlb_finish_mmu(&tlb, vma->vm_start, vma->vm_end);
|
||||
}
|
||||
}
|
||||
tlb_finish_mmu(&tlb, 0, -1);
|
||||
pr_info("oom_reaper: reaped process %d (%s), now anon-rss:%lukB, file-rss:%lukB, shmem-rss:%lukB\n",
|
||||
task_pid_nr(tsk), tsk->comm,
|
||||
K(get_mm_counter(mm, MM_ANONPAGES)),
|
||||
|
|
|
@ -433,11 +433,8 @@ static void domain_dirty_limits(struct dirty_throttle_control *dtc)
|
|||
else
|
||||
bg_thresh = (bg_ratio * available_memory) / PAGE_SIZE;
|
||||
|
||||
if (unlikely(bg_thresh >= thresh)) {
|
||||
pr_warn("vm direct limit must be set greater than background limit.\n");
|
||||
if (bg_thresh >= thresh)
|
||||
bg_thresh = thresh / 2;
|
||||
}
|
||||
|
||||
tsk = current;
|
||||
if (tsk->flags & PF_LESS_THROTTLE || rt_task(tsk)) {
|
||||
bg_thresh += bg_thresh / 4 + global_wb_domain.dirty_limit / 32;
|
||||
|
|
|
@ -2507,10 +2507,6 @@ void drain_all_pages(struct zone *zone)
|
|||
if (WARN_ON_ONCE(!mm_percpu_wq))
|
||||
return;
|
||||
|
||||
/* Workqueues cannot recurse */
|
||||
if (current->flags & PF_WQ_WORKER)
|
||||
return;
|
||||
|
||||
/*
|
||||
* Do not drain if one is already in progress unless it's specific to
|
||||
* a zone. Such callers are primarily CMA and memory hotplug and need
|
||||
|
@ -7656,11 +7652,18 @@ int alloc_contig_range(unsigned long start, unsigned long end,
|
|||
|
||||
/*
|
||||
* In case of -EBUSY, we'd like to know which page causes problem.
|
||||
* So, just fall through. We will check it in test_pages_isolated().
|
||||
* So, just fall through. test_pages_isolated() has a tracepoint
|
||||
* which will report the busy page.
|
||||
*
|
||||
* It is possible that busy pages could become available before
|
||||
* the call to test_pages_isolated, and the range will actually be
|
||||
* allocated. So, if we fall through be sure to clear ret so that
|
||||
* -EBUSY is not accidentally used or returned to caller.
|
||||
*/
|
||||
ret = __alloc_contig_migrate_range(&cc, start, end);
|
||||
if (ret && ret != -EBUSY)
|
||||
goto done;
|
||||
ret =0;
|
||||
|
||||
/*
|
||||
* Pages from [start, end) are within a MAX_ORDER_NR_PAGES
|
||||
|
|
|
@ -83,8 +83,11 @@ def print_result(symboltype, symbolformat, argc):
|
|||
for d, n in delta:
|
||||
if d: print("%-40s %7s %7s %+7d" % (n, old.get(n,"-"), new.get(n,"-"), d))
|
||||
|
||||
print("Total: Before=%d, After=%d, chg %+.2f%%" % \
|
||||
(otot, ntot, (ntot - otot)*100.0/otot))
|
||||
if otot:
|
||||
percent = (ntot - otot) * 100.0 / otot
|
||||
else:
|
||||
percent = 0
|
||||
print("Total: Before=%d, After=%d, chg %+.2f%%" % (otot, ntot, percent))
|
||||
|
||||
if sys.argv[1] == "-c":
|
||||
print_result("Function", "tT", 3)
|
||||
|
|
|
@ -44,9 +44,16 @@
|
|||
set -o errexit
|
||||
set -o nounset
|
||||
|
||||
READELF="${CROSS_COMPILE}readelf"
|
||||
ADDR2LINE="${CROSS_COMPILE}addr2line"
|
||||
SIZE="${CROSS_COMPILE}size"
|
||||
NM="${CROSS_COMPILE}nm"
|
||||
|
||||
command -v awk >/dev/null 2>&1 || die "awk isn't installed"
|
||||
command -v readelf >/dev/null 2>&1 || die "readelf isn't installed"
|
||||
command -v addr2line >/dev/null 2>&1 || die "addr2line isn't installed"
|
||||
command -v ${READELF} >/dev/null 2>&1 || die "readelf isn't installed"
|
||||
command -v ${ADDR2LINE} >/dev/null 2>&1 || die "addr2line isn't installed"
|
||||
command -v ${SIZE} >/dev/null 2>&1 || die "size isn't installed"
|
||||
command -v ${NM} >/dev/null 2>&1 || die "nm isn't installed"
|
||||
|
||||
usage() {
|
||||
echo "usage: faddr2line <object file> <func+offset> <func+offset>..." >&2
|
||||
|
@ -69,10 +76,10 @@ die() {
|
|||
find_dir_prefix() {
|
||||
local objfile=$1
|
||||
|
||||
local start_kernel_addr=$(readelf -sW $objfile | awk '$8 == "start_kernel" {printf "0x%s", $2}')
|
||||
local start_kernel_addr=$(${READELF} -sW $objfile | awk '$8 == "start_kernel" {printf "0x%s", $2}')
|
||||
[[ -z $start_kernel_addr ]] && return
|
||||
|
||||
local file_line=$(addr2line -e $objfile $start_kernel_addr)
|
||||
local file_line=$(${ADDR2LINE} -e $objfile $start_kernel_addr)
|
||||
[[ -z $file_line ]] && return
|
||||
|
||||
local prefix=${file_line%init/main.c:*}
|
||||
|
@ -104,7 +111,7 @@ __faddr2line() {
|
|||
|
||||
# Go through each of the object's symbols which match the func name.
|
||||
# In rare cases there might be duplicates.
|
||||
file_end=$(size -Ax $objfile | awk '$1 == ".text" {print $2}')
|
||||
file_end=$(${SIZE} -Ax $objfile | awk '$1 == ".text" {print $2}')
|
||||
while read symbol; do
|
||||
local fields=($symbol)
|
||||
local sym_base=0x${fields[0]}
|
||||
|
@ -156,10 +163,10 @@ __faddr2line() {
|
|||
|
||||
# pass real address to addr2line
|
||||
echo "$func+$offset/$sym_size:"
|
||||
addr2line -fpie $objfile $addr | sed "s; $dir_prefix\(\./\)*; ;"
|
||||
${ADDR2LINE} -fpie $objfile $addr | sed "s; $dir_prefix\(\./\)*; ;"
|
||||
DONE=1
|
||||
|
||||
done < <(nm -n $objfile | awk -v fn=$func -v end=$file_end '$3 == fn { found=1; line=$0; start=$1; next } found == 1 { found=0; print line, "0x"$1 } END {if (found == 1) print line, end; }')
|
||||
done < <(${NM} -n $objfile | awk -v fn=$func -v end=$file_end '$3 == fn { found=1; line=$0; start=$1; next } found == 1 { found=0; print line, "0x"$1 } END {if (found == 1) print line, end; }')
|
||||
}
|
||||
|
||||
[[ $# -lt 2 ]] && usage
|
||||
|
|
Loading…
Reference in New Issue