Merge branch 'akpm' (patches from Andrew)
Merge fixes from Andrew Morton: "10 fixes" * emailed patches from Andrew Morton <akpm@linux-foundation.org>: dax: move writeback calls into the filesystems dax: give DAX clearing code correct bdev ext4: online defrag not supported with DAX ext2, ext4: only set S_DAX for regular inodes block: disable block device DAX by default ocfs2: unlock inode if deleting inode from orphan fails mm: ASLR: use get_random_long() drivers: char: random: add get_random_long() mm: numa: quickly fail allocations for NUMA balancing on full nodes mm: thp: fix SMP race condition between THP page fault and MADV_DONTNEED
This commit is contained in:
commit
691429e13d
|
@ -173,7 +173,7 @@ unsigned long arch_mmap_rnd(void)
|
|||
{
|
||||
unsigned long rnd;
|
||||
|
||||
rnd = (unsigned long)get_random_int() & ((1 << mmap_rnd_bits) - 1);
|
||||
rnd = get_random_long() & ((1UL << mmap_rnd_bits) - 1);
|
||||
|
||||
return rnd << PAGE_SHIFT;
|
||||
}
|
||||
|
|
|
@ -53,10 +53,10 @@ unsigned long arch_mmap_rnd(void)
|
|||
|
||||
#ifdef CONFIG_COMPAT
|
||||
if (test_thread_flag(TIF_32BIT))
|
||||
rnd = (unsigned long)get_random_int() & ((1 << mmap_rnd_compat_bits) - 1);
|
||||
rnd = get_random_long() & ((1UL << mmap_rnd_compat_bits) - 1);
|
||||
else
|
||||
#endif
|
||||
rnd = (unsigned long)get_random_int() & ((1 << mmap_rnd_bits) - 1);
|
||||
rnd = get_random_long() & ((1UL << mmap_rnd_bits) - 1);
|
||||
return rnd << PAGE_SHIFT;
|
||||
}
|
||||
|
||||
|
|
|
@ -146,7 +146,7 @@ unsigned long arch_mmap_rnd(void)
|
|||
{
|
||||
unsigned long rnd;
|
||||
|
||||
rnd = (unsigned long)get_random_int();
|
||||
rnd = get_random_long();
|
||||
rnd <<= PAGE_SHIFT;
|
||||
if (TASK_IS_32BIT_ADDR)
|
||||
rnd &= 0xfffffful;
|
||||
|
@ -174,7 +174,7 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
|
|||
|
||||
static inline unsigned long brk_rnd(void)
|
||||
{
|
||||
unsigned long rnd = get_random_int();
|
||||
unsigned long rnd = get_random_long();
|
||||
|
||||
rnd = rnd << PAGE_SHIFT;
|
||||
/* 8MB for 32bit, 256MB for 64bit */
|
||||
|
|
|
@ -1768,9 +1768,9 @@ static inline unsigned long brk_rnd(void)
|
|||
|
||||
/* 8MB for 32bit, 1GB for 64bit */
|
||||
if (is_32bit_task())
|
||||
rnd = (long)(get_random_int() % (1<<(23-PAGE_SHIFT)));
|
||||
rnd = (get_random_long() % (1UL<<(23-PAGE_SHIFT)));
|
||||
else
|
||||
rnd = (long)(get_random_int() % (1<<(30-PAGE_SHIFT)));
|
||||
rnd = (get_random_long() % (1UL<<(30-PAGE_SHIFT)));
|
||||
|
||||
return rnd << PAGE_SHIFT;
|
||||
}
|
||||
|
|
|
@ -59,9 +59,9 @@ unsigned long arch_mmap_rnd(void)
|
|||
|
||||
/* 8MB for 32bit, 1GB for 64bit */
|
||||
if (is_32bit_task())
|
||||
rnd = (unsigned long)get_random_int() % (1<<(23-PAGE_SHIFT));
|
||||
rnd = get_random_long() % (1<<(23-PAGE_SHIFT));
|
||||
else
|
||||
rnd = (unsigned long)get_random_int() % (1<<(30-PAGE_SHIFT));
|
||||
rnd = get_random_long() % (1UL<<(30-PAGE_SHIFT));
|
||||
|
||||
return rnd << PAGE_SHIFT;
|
||||
}
|
||||
|
|
|
@ -264,7 +264,7 @@ static unsigned long mmap_rnd(void)
|
|||
unsigned long rnd = 0UL;
|
||||
|
||||
if (current->flags & PF_RANDOMIZE) {
|
||||
unsigned long val = get_random_int();
|
||||
unsigned long val = get_random_long();
|
||||
if (test_thread_flag(TIF_32BIT))
|
||||
rnd = (val % (1UL << (23UL-PAGE_SHIFT)));
|
||||
else
|
||||
|
|
|
@ -71,12 +71,12 @@ unsigned long arch_mmap_rnd(void)
|
|||
|
||||
if (mmap_is_ia32())
|
||||
#ifdef CONFIG_COMPAT
|
||||
rnd = (unsigned long)get_random_int() & ((1 << mmap_rnd_compat_bits) - 1);
|
||||
rnd = get_random_long() & ((1UL << mmap_rnd_compat_bits) - 1);
|
||||
#else
|
||||
rnd = (unsigned long)get_random_int() & ((1 << mmap_rnd_bits) - 1);
|
||||
rnd = get_random_long() & ((1UL << mmap_rnd_bits) - 1);
|
||||
#endif
|
||||
else
|
||||
rnd = (unsigned long)get_random_int() & ((1 << mmap_rnd_bits) - 1);
|
||||
rnd = get_random_long() & ((1UL << mmap_rnd_bits) - 1);
|
||||
|
||||
return rnd << PAGE_SHIFT;
|
||||
}
|
||||
|
|
|
@ -88,6 +88,19 @@ config BLK_DEV_INTEGRITY
|
|||
T10/SCSI Data Integrity Field or the T13/ATA External Path
|
||||
Protection. If in doubt, say N.
|
||||
|
||||
config BLK_DEV_DAX
|
||||
bool "Block device DAX support"
|
||||
depends on FS_DAX
|
||||
depends on BROKEN
|
||||
help
|
||||
When DAX support is available (CONFIG_FS_DAX) raw block
|
||||
devices can also support direct userspace access to the
|
||||
storage capacity via MMAP(2) similar to a file on a
|
||||
DAX-enabled filesystem. However, the DAX I/O-path disables
|
||||
some standard I/O-statistics, and the MMAP(2) path has some
|
||||
operational differences due to bypassing the page
|
||||
cache. If in doubt, say N.
|
||||
|
||||
config BLK_DEV_THROTTLING
|
||||
bool "Block layer bio throttling support"
|
||||
depends on BLK_CGROUP=y
|
||||
|
|
|
@ -1818,6 +1818,28 @@ unsigned int get_random_int(void)
|
|||
}
|
||||
EXPORT_SYMBOL(get_random_int);
|
||||
|
||||
/*
|
||||
* Same as get_random_int(), but returns unsigned long.
|
||||
*/
|
||||
unsigned long get_random_long(void)
|
||||
{
|
||||
__u32 *hash;
|
||||
unsigned long ret;
|
||||
|
||||
if (arch_get_random_long(&ret))
|
||||
return ret;
|
||||
|
||||
hash = get_cpu_var(get_random_int_hash);
|
||||
|
||||
hash[0] += current->pid + jiffies + random_get_entropy();
|
||||
md5_transform(hash, random_int_secret);
|
||||
ret = *(unsigned long *)hash;
|
||||
put_cpu_var(get_random_int_hash);
|
||||
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL(get_random_long);
|
||||
|
||||
/*
|
||||
* randomize_range() returns a start address such that
|
||||
*
|
||||
|
|
|
@ -653,7 +653,7 @@ static unsigned long randomize_stack_top(unsigned long stack_top)
|
|||
|
||||
if ((current->flags & PF_RANDOMIZE) &&
|
||||
!(current->personality & ADDR_NO_RANDOMIZE)) {
|
||||
random_variable = (unsigned long) get_random_int();
|
||||
random_variable = get_random_long();
|
||||
random_variable &= STACK_RND_MASK;
|
||||
random_variable <<= PAGE_SHIFT;
|
||||
}
|
||||
|
|
|
@ -1201,7 +1201,11 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
|
|||
bdev->bd_disk = disk;
|
||||
bdev->bd_queue = disk->queue;
|
||||
bdev->bd_contains = bdev;
|
||||
bdev->bd_inode->i_flags = disk->fops->direct_access ? S_DAX : 0;
|
||||
if (IS_ENABLED(CONFIG_BLK_DEV_DAX) && disk->fops->direct_access)
|
||||
bdev->bd_inode->i_flags = S_DAX;
|
||||
else
|
||||
bdev->bd_inode->i_flags = 0;
|
||||
|
||||
if (!partno) {
|
||||
ret = -ENXIO;
|
||||
bdev->bd_part = disk_get_part(disk, partno);
|
||||
|
@ -1693,13 +1697,24 @@ static int blkdev_releasepage(struct page *page, gfp_t wait)
|
|||
return try_to_free_buffers(page);
|
||||
}
|
||||
|
||||
static int blkdev_writepages(struct address_space *mapping,
|
||||
struct writeback_control *wbc)
|
||||
{
|
||||
if (dax_mapping(mapping)) {
|
||||
struct block_device *bdev = I_BDEV(mapping->host);
|
||||
|
||||
return dax_writeback_mapping_range(mapping, bdev, wbc);
|
||||
}
|
||||
return generic_writepages(mapping, wbc);
|
||||
}
|
||||
|
||||
static const struct address_space_operations def_blk_aops = {
|
||||
.readpage = blkdev_readpage,
|
||||
.readpages = blkdev_readpages,
|
||||
.writepage = blkdev_writepage,
|
||||
.write_begin = blkdev_write_begin,
|
||||
.write_end = blkdev_write_end,
|
||||
.writepages = generic_writepages,
|
||||
.writepages = blkdev_writepages,
|
||||
.releasepage = blkdev_releasepage,
|
||||
.direct_IO = blkdev_direct_IO,
|
||||
.is_dirty_writeback = buffer_check_dirty_writeback,
|
||||
|
|
21
fs/dax.c
21
fs/dax.c
|
@ -79,15 +79,14 @@ struct page *read_dax_sector(struct block_device *bdev, sector_t n)
|
|||
}
|
||||
|
||||
/*
|
||||
* dax_clear_blocks() is called from within transaction context from XFS,
|
||||
* dax_clear_sectors() is called from within transaction context from XFS,
|
||||
* and hence this means the stack from this point must follow GFP_NOFS
|
||||
* semantics for all operations.
|
||||
*/
|
||||
int dax_clear_blocks(struct inode *inode, sector_t block, long _size)
|
||||
int dax_clear_sectors(struct block_device *bdev, sector_t _sector, long _size)
|
||||
{
|
||||
struct block_device *bdev = inode->i_sb->s_bdev;
|
||||
struct blk_dax_ctl dax = {
|
||||
.sector = block << (inode->i_blkbits - 9),
|
||||
.sector = _sector,
|
||||
.size = _size,
|
||||
};
|
||||
|
||||
|
@ -109,7 +108,7 @@ int dax_clear_blocks(struct inode *inode, sector_t block, long _size)
|
|||
wmb_pmem();
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(dax_clear_blocks);
|
||||
EXPORT_SYMBOL_GPL(dax_clear_sectors);
|
||||
|
||||
/* the clear_pmem() calls are ordered by a wmb_pmem() in the caller */
|
||||
static void dax_new_buf(void __pmem *addr, unsigned size, unsigned first,
|
||||
|
@ -485,11 +484,10 @@ static int dax_writeback_one(struct block_device *bdev,
|
|||
* end]. This is required by data integrity operations to ensure file data is
|
||||
* on persistent storage prior to completion of the operation.
|
||||
*/
|
||||
int dax_writeback_mapping_range(struct address_space *mapping, loff_t start,
|
||||
loff_t end)
|
||||
int dax_writeback_mapping_range(struct address_space *mapping,
|
||||
struct block_device *bdev, struct writeback_control *wbc)
|
||||
{
|
||||
struct inode *inode = mapping->host;
|
||||
struct block_device *bdev = inode->i_sb->s_bdev;
|
||||
pgoff_t start_index, end_index, pmd_index;
|
||||
pgoff_t indices[PAGEVEC_SIZE];
|
||||
struct pagevec pvec;
|
||||
|
@ -500,8 +498,11 @@ int dax_writeback_mapping_range(struct address_space *mapping, loff_t start,
|
|||
if (WARN_ON_ONCE(inode->i_blkbits != PAGE_SHIFT))
|
||||
return -EIO;
|
||||
|
||||
start_index = start >> PAGE_CACHE_SHIFT;
|
||||
end_index = end >> PAGE_CACHE_SHIFT;
|
||||
if (!mapping->nrexceptional || wbc->sync_mode != WB_SYNC_ALL)
|
||||
return 0;
|
||||
|
||||
start_index = wbc->range_start >> PAGE_CACHE_SHIFT;
|
||||
end_index = wbc->range_end >> PAGE_CACHE_SHIFT;
|
||||
pmd_index = DAX_PMD_INDEX(start_index);
|
||||
|
||||
rcu_read_lock();
|
||||
|
|
|
@ -737,8 +737,10 @@ static int ext2_get_blocks(struct inode *inode,
|
|||
* so that it's not found by another thread before it's
|
||||
* initialised
|
||||
*/
|
||||
err = dax_clear_blocks(inode, le32_to_cpu(chain[depth-1].key),
|
||||
1 << inode->i_blkbits);
|
||||
err = dax_clear_sectors(inode->i_sb->s_bdev,
|
||||
le32_to_cpu(chain[depth-1].key) <<
|
||||
(inode->i_blkbits - 9),
|
||||
1 << inode->i_blkbits);
|
||||
if (err) {
|
||||
mutex_unlock(&ei->truncate_mutex);
|
||||
goto cleanup;
|
||||
|
@ -874,6 +876,14 @@ ext2_direct_IO(struct kiocb *iocb, struct iov_iter *iter, loff_t offset)
|
|||
static int
|
||||
ext2_writepages(struct address_space *mapping, struct writeback_control *wbc)
|
||||
{
|
||||
#ifdef CONFIG_FS_DAX
|
||||
if (dax_mapping(mapping)) {
|
||||
return dax_writeback_mapping_range(mapping,
|
||||
mapping->host->i_sb->s_bdev,
|
||||
wbc);
|
||||
}
|
||||
#endif
|
||||
|
||||
return mpage_writepages(mapping, wbc, ext2_get_block);
|
||||
}
|
||||
|
||||
|
@ -1296,7 +1306,7 @@ void ext2_set_inode_flags(struct inode *inode)
|
|||
inode->i_flags |= S_NOATIME;
|
||||
if (flags & EXT2_DIRSYNC_FL)
|
||||
inode->i_flags |= S_DIRSYNC;
|
||||
if (test_opt(inode->i_sb, DAX))
|
||||
if (test_opt(inode->i_sb, DAX) && S_ISREG(inode->i_mode))
|
||||
inode->i_flags |= S_DAX;
|
||||
}
|
||||
|
||||
|
|
|
@ -2478,6 +2478,10 @@ static int ext4_writepages(struct address_space *mapping,
|
|||
|
||||
trace_ext4_writepages(inode, wbc);
|
||||
|
||||
if (dax_mapping(mapping))
|
||||
return dax_writeback_mapping_range(mapping, inode->i_sb->s_bdev,
|
||||
wbc);
|
||||
|
||||
/*
|
||||
* No pages to write? This is mainly a kludge to avoid starting
|
||||
* a transaction for special inodes like journal inode on last iput()
|
||||
|
@ -4155,7 +4159,7 @@ void ext4_set_inode_flags(struct inode *inode)
|
|||
new_fl |= S_NOATIME;
|
||||
if (flags & EXT4_DIRSYNC_FL)
|
||||
new_fl |= S_DIRSYNC;
|
||||
if (test_opt(inode->i_sb, DAX))
|
||||
if (test_opt(inode->i_sb, DAX) && S_ISREG(inode->i_mode))
|
||||
new_fl |= S_DAX;
|
||||
inode_set_flags(inode, new_fl,
|
||||
S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC|S_DAX);
|
||||
|
|
|
@ -583,6 +583,11 @@ group_extend_out:
|
|||
"Online defrag not supported with bigalloc");
|
||||
err = -EOPNOTSUPP;
|
||||
goto mext_out;
|
||||
} else if (IS_DAX(inode)) {
|
||||
ext4_msg(sb, KERN_ERR,
|
||||
"Online defrag not supported with DAX");
|
||||
err = -EOPNOTSUPP;
|
||||
goto mext_out;
|
||||
}
|
||||
|
||||
err = mnt_want_write_file(filp);
|
||||
|
|
|
@ -956,6 +956,7 @@ clean_orphan:
|
|||
tmp_ret = ocfs2_del_inode_from_orphan(osb, inode, di_bh,
|
||||
update_isize, end);
|
||||
if (tmp_ret < 0) {
|
||||
ocfs2_inode_unlock(inode, 1);
|
||||
ret = tmp_ret;
|
||||
mlog_errno(ret);
|
||||
brelse(di_bh);
|
||||
|
|
|
@ -55,7 +55,7 @@ xfs_count_page_state(
|
|||
} while ((bh = bh->b_this_page) != head);
|
||||
}
|
||||
|
||||
STATIC struct block_device *
|
||||
struct block_device *
|
||||
xfs_find_bdev_for_inode(
|
||||
struct inode *inode)
|
||||
{
|
||||
|
@ -1208,6 +1208,10 @@ xfs_vm_writepages(
|
|||
struct writeback_control *wbc)
|
||||
{
|
||||
xfs_iflags_clear(XFS_I(mapping->host), XFS_ITRUNCATED);
|
||||
if (dax_mapping(mapping))
|
||||
return dax_writeback_mapping_range(mapping,
|
||||
xfs_find_bdev_for_inode(mapping->host), wbc);
|
||||
|
||||
return generic_writepages(mapping, wbc);
|
||||
}
|
||||
|
||||
|
|
|
@ -62,5 +62,6 @@ int xfs_get_blocks_dax_fault(struct inode *inode, sector_t offset,
|
|||
struct buffer_head *map_bh, int create);
|
||||
|
||||
extern void xfs_count_page_state(struct page *, int *, int *);
|
||||
extern struct block_device *xfs_find_bdev_for_inode(struct inode *);
|
||||
|
||||
#endif /* __XFS_AOPS_H__ */
|
||||
|
|
|
@ -75,7 +75,8 @@ xfs_zero_extent(
|
|||
ssize_t size = XFS_FSB_TO_B(mp, count_fsb);
|
||||
|
||||
if (IS_DAX(VFS_I(ip)))
|
||||
return dax_clear_blocks(VFS_I(ip), block, size);
|
||||
return dax_clear_sectors(xfs_find_bdev_for_inode(VFS_I(ip)),
|
||||
sector, size);
|
||||
|
||||
/*
|
||||
* let the block layer decide on the fastest method of
|
||||
|
|
|
@ -7,7 +7,7 @@
|
|||
|
||||
ssize_t dax_do_io(struct kiocb *, struct inode *, struct iov_iter *, loff_t,
|
||||
get_block_t, dio_iodone_t, int flags);
|
||||
int dax_clear_blocks(struct inode *, sector_t block, long size);
|
||||
int dax_clear_sectors(struct block_device *bdev, sector_t _sector, long _size);
|
||||
int dax_zero_page_range(struct inode *, loff_t from, unsigned len, get_block_t);
|
||||
int dax_truncate_page(struct inode *, loff_t from, get_block_t);
|
||||
int dax_fault(struct vm_area_struct *, struct vm_fault *, get_block_t,
|
||||
|
@ -52,6 +52,8 @@ static inline bool dax_mapping(struct address_space *mapping)
|
|||
{
|
||||
return mapping->host && IS_DAX(mapping->host);
|
||||
}
|
||||
int dax_writeback_mapping_range(struct address_space *mapping, loff_t start,
|
||||
loff_t end);
|
||||
|
||||
struct writeback_control;
|
||||
int dax_writeback_mapping_range(struct address_space *mapping,
|
||||
struct block_device *bdev, struct writeback_control *wbc);
|
||||
#endif
|
||||
|
|
|
@ -34,6 +34,7 @@ extern const struct file_operations random_fops, urandom_fops;
|
|||
#endif
|
||||
|
||||
unsigned int get_random_int(void);
|
||||
unsigned long get_random_long(void);
|
||||
unsigned long randomize_range(unsigned long start, unsigned long end, unsigned long len);
|
||||
|
||||
u32 prandom_u32(void);
|
||||
|
|
12
mm/filemap.c
12
mm/filemap.c
|
@ -446,7 +446,8 @@ int filemap_write_and_wait(struct address_space *mapping)
|
|||
{
|
||||
int err = 0;
|
||||
|
||||
if (mapping->nrpages) {
|
||||
if ((!dax_mapping(mapping) && mapping->nrpages) ||
|
||||
(dax_mapping(mapping) && mapping->nrexceptional)) {
|
||||
err = filemap_fdatawrite(mapping);
|
||||
/*
|
||||
* Even if the above returned error, the pages may be
|
||||
|
@ -482,13 +483,8 @@ int filemap_write_and_wait_range(struct address_space *mapping,
|
|||
{
|
||||
int err = 0;
|
||||
|
||||
if (dax_mapping(mapping) && mapping->nrexceptional) {
|
||||
err = dax_writeback_mapping_range(mapping, lstart, lend);
|
||||
if (err)
|
||||
return err;
|
||||
}
|
||||
|
||||
if (mapping->nrpages) {
|
||||
if ((!dax_mapping(mapping) && mapping->nrpages) ||
|
||||
(dax_mapping(mapping) && mapping->nrexceptional)) {
|
||||
err = __filemap_fdatawrite_range(mapping, lstart, lend,
|
||||
WB_SYNC_ALL);
|
||||
/* See comment of filemap_write_and_wait() */
|
||||
|
|
14
mm/memory.c
14
mm/memory.c
|
@ -3404,8 +3404,18 @@ static int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
|
|||
if (unlikely(pmd_none(*pmd)) &&
|
||||
unlikely(__pte_alloc(mm, vma, pmd, address)))
|
||||
return VM_FAULT_OOM;
|
||||
/* if an huge pmd materialized from under us just retry later */
|
||||
if (unlikely(pmd_trans_huge(*pmd) || pmd_devmap(*pmd)))
|
||||
/*
|
||||
* If a huge pmd materialized under us just retry later. Use
|
||||
* pmd_trans_unstable() instead of pmd_trans_huge() to ensure the pmd
|
||||
* didn't become pmd_trans_huge under us and then back to pmd_none, as
|
||||
* a result of MADV_DONTNEED running immediately after a huge pmd fault
|
||||
* in a different thread of this mm, in turn leading to a misleading
|
||||
* pmd_trans_huge() retval. All we have to ensure is that it is a
|
||||
* regular pmd that we can walk with pte_offset_map() and we can do that
|
||||
* through an atomic read in C, which is what pmd_trans_unstable()
|
||||
* provides.
|
||||
*/
|
||||
if (unlikely(pmd_trans_unstable(pmd) || pmd_devmap(*pmd)))
|
||||
return 0;
|
||||
/*
|
||||
* A regular pmd is established and it can't morph into a huge pmd
|
||||
|
|
|
@ -1582,7 +1582,7 @@ static struct page *alloc_misplaced_dst_page(struct page *page,
|
|||
(GFP_HIGHUSER_MOVABLE |
|
||||
__GFP_THISNODE | __GFP_NOMEMALLOC |
|
||||
__GFP_NORETRY | __GFP_NOWARN) &
|
||||
~(__GFP_IO | __GFP_FS), 0);
|
||||
~__GFP_RECLAIM, 0);
|
||||
|
||||
return newpage;
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue