Merge branch 'akpm' (patches from Andrew)
Merge fixes from Andrew Morton: "26 fixes" * emailed patches from Andrew Morton <akpm@linux-foundation.org>: (26 commits) MAINTAINERS: add Dan Streetman to zbud maintainers MAINTAINERS: add Dan Streetman to zswap maintainers mm: do not export ioremap_page_range symbol for external module mn10300: fix build error of missing fpu_save() romfs: use different way to generate fsid for BLOCK or MTD frv: add missing atomic64 operations mm, page_alloc: fix premature OOM when racing with cpuset mems update mm, page_alloc: move cpuset seqcount checking to slowpath mm, page_alloc: fix fast-path race with cpuset update or removal mm, page_alloc: fix check for NULL preferred_zone kernel/panic.c: add missing \n fbdev: color map copying bounds checking frv: add atomic64_add_unless() mm/mempolicy.c: do not put mempolicy before using its nodemask radix-tree: fix private list warnings Documentation/filesystems/proc.txt: add VmPin mm, memcg: do not retry precharge charges proc: add a schedule point in proc_pid_readdir() mm: alloc_contig: re-allow CMA to compact FS pages mm/slub.c: trace free objects at KERN_INFO ...
This commit is contained in:
commit
883af14e67
|
@ -212,10 +212,11 @@ asynchronous manner and the value may not be very precise. To see a precise
|
|||
snapshot of a moment, you can see /proc/<pid>/smaps file and scan page table.
|
||||
It's slow but very precise.
|
||||
|
||||
Table 1-2: Contents of the status files (as of 4.1)
|
||||
Table 1-2: Contents of the status files (as of 4.8)
|
||||
..............................................................................
|
||||
Field Content
|
||||
Name filename of the executable
|
||||
Umask file mode creation mask
|
||||
State state (R is running, S is sleeping, D is sleeping
|
||||
in an uninterruptible wait, Z is zombie,
|
||||
T is traced or stopped)
|
||||
|
@ -226,7 +227,6 @@ Table 1-2: Contents of the status files (as of 4.1)
|
|||
TracerPid PID of process tracing this process (0 if not)
|
||||
Uid Real, effective, saved set, and file system UIDs
|
||||
Gid Real, effective, saved set, and file system GIDs
|
||||
Umask file mode creation mask
|
||||
FDSize number of file descriptor slots currently allocated
|
||||
Groups supplementary group list
|
||||
NStgid descendant namespace thread group ID hierarchy
|
||||
|
@ -236,6 +236,7 @@ Table 1-2: Contents of the status files (as of 4.1)
|
|||
VmPeak peak virtual memory size
|
||||
VmSize total program size
|
||||
VmLck locked memory size
|
||||
VmPin pinned memory size
|
||||
VmHWM peak resident set size ("high water mark")
|
||||
VmRSS size of memory portions. It contains the three
|
||||
following parts (VmRSS = RssAnon + RssFile + RssShmem)
|
||||
|
|
|
@ -13625,6 +13625,7 @@ F: drivers/net/hamradio/z8530.h
|
|||
|
||||
ZBUD COMPRESSED PAGE ALLOCATOR
|
||||
M: Seth Jennings <sjenning@redhat.com>
|
||||
M: Dan Streetman <ddstreet@ieee.org>
|
||||
L: linux-mm@kvack.org
|
||||
S: Maintained
|
||||
F: mm/zbud.c
|
||||
|
@ -13680,6 +13681,7 @@ F: Documentation/vm/zsmalloc.txt
|
|||
|
||||
ZSWAP COMPRESSED SWAP CACHING
|
||||
M: Seth Jennings <sjenning@redhat.com>
|
||||
M: Dan Streetman <ddstreet@ieee.org>
|
||||
L: linux-mm@kvack.org
|
||||
S: Maintained
|
||||
F: mm/zswap.c
|
||||
|
|
|
@ -139,7 +139,7 @@ static inline void atomic64_dec(atomic64_t *v)
|
|||
#define atomic64_sub_and_test(i,v) (atomic64_sub_return((i), (v)) == 0)
|
||||
#define atomic64_dec_and_test(v) (atomic64_dec_return((v)) == 0)
|
||||
#define atomic64_inc_and_test(v) (atomic64_inc_return((v)) == 0)
|
||||
|
||||
#define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1, 0)
|
||||
|
||||
#define atomic_cmpxchg(v, old, new) (cmpxchg(&(v)->counter, old, new))
|
||||
#define atomic_xchg(v, new) (xchg(&(v)->counter, new))
|
||||
|
@ -161,6 +161,39 @@ static __inline__ int __atomic_add_unless(atomic_t *v, int a, int u)
|
|||
return c;
|
||||
}
|
||||
|
||||
static inline int atomic64_add_unless(atomic64_t *v, long long i, long long u)
|
||||
{
|
||||
long long c, old;
|
||||
|
||||
c = atomic64_read(v);
|
||||
for (;;) {
|
||||
if (unlikely(c == u))
|
||||
break;
|
||||
old = atomic64_cmpxchg(v, c, c + i);
|
||||
if (likely(old == c))
|
||||
break;
|
||||
c = old;
|
||||
}
|
||||
return c != u;
|
||||
}
|
||||
|
||||
static inline long long atomic64_dec_if_positive(atomic64_t *v)
|
||||
{
|
||||
long long c, old, dec;
|
||||
|
||||
c = atomic64_read(v);
|
||||
for (;;) {
|
||||
dec = c - 1;
|
||||
if (unlikely(dec < 0))
|
||||
break;
|
||||
old = atomic64_cmpxchg((v), c, dec);
|
||||
if (likely(old == c))
|
||||
break;
|
||||
c = old;
|
||||
}
|
||||
return dec;
|
||||
}
|
||||
|
||||
#define ATOMIC_OP(op) \
|
||||
static inline int atomic_fetch_##op(int i, atomic_t *v) \
|
||||
{ \
|
||||
|
|
|
@ -16,7 +16,7 @@
|
|||
struct task_struct;
|
||||
struct thread_struct;
|
||||
|
||||
#if !defined(CONFIG_LAZY_SAVE_FPU)
|
||||
#if defined(CONFIG_FPU) && !defined(CONFIG_LAZY_SAVE_FPU)
|
||||
struct fpu_state_struct;
|
||||
extern asmlinkage void fpu_save(struct fpu_state_struct *);
|
||||
#define switch_fpu(prev, next) \
|
||||
|
|
|
@ -408,14 +408,14 @@ static ssize_t show_valid_zones(struct device *dev,
|
|||
sprintf(buf, "%s", zone->name);
|
||||
|
||||
/* MMOP_ONLINE_KERNEL */
|
||||
zone_shift = zone_can_shift(start_pfn, nr_pages, ZONE_NORMAL);
|
||||
zone_can_shift(start_pfn, nr_pages, ZONE_NORMAL, &zone_shift);
|
||||
if (zone_shift) {
|
||||
strcat(buf, " ");
|
||||
strcat(buf, (zone + zone_shift)->name);
|
||||
}
|
||||
|
||||
/* MMOP_ONLINE_MOVABLE */
|
||||
zone_shift = zone_can_shift(start_pfn, nr_pages, ZONE_MOVABLE);
|
||||
zone_can_shift(start_pfn, nr_pages, ZONE_MOVABLE, &zone_shift);
|
||||
if (zone_shift) {
|
||||
strcat(buf, " ");
|
||||
strcat(buf, (zone + zone_shift)->name);
|
||||
|
|
|
@ -330,7 +330,7 @@ static int h_memstick_read_dev_id(struct memstick_dev *card,
|
|||
struct ms_id_register id_reg;
|
||||
|
||||
if (!(*mrq)) {
|
||||
memstick_init_req(&card->current_mrq, MS_TPC_READ_REG, NULL,
|
||||
memstick_init_req(&card->current_mrq, MS_TPC_READ_REG, &id_reg,
|
||||
sizeof(struct ms_id_register));
|
||||
*mrq = &card->current_mrq;
|
||||
return 0;
|
||||
|
|
|
@ -163,17 +163,18 @@ void fb_dealloc_cmap(struct fb_cmap *cmap)
|
|||
|
||||
int fb_copy_cmap(const struct fb_cmap *from, struct fb_cmap *to)
|
||||
{
|
||||
int tooff = 0, fromoff = 0;
|
||||
int size;
|
||||
unsigned int tooff = 0, fromoff = 0;
|
||||
size_t size;
|
||||
|
||||
if (to->start > from->start)
|
||||
fromoff = to->start - from->start;
|
||||
else
|
||||
tooff = from->start - to->start;
|
||||
size = to->len - tooff;
|
||||
if (size > (int) (from->len - fromoff))
|
||||
size = from->len - fromoff;
|
||||
if (size <= 0)
|
||||
if (fromoff >= from->len || tooff >= to->len)
|
||||
return -EINVAL;
|
||||
|
||||
size = min_t(size_t, to->len - tooff, from->len - fromoff);
|
||||
if (size == 0)
|
||||
return -EINVAL;
|
||||
size *= sizeof(u16);
|
||||
|
||||
|
@ -187,17 +188,18 @@ int fb_copy_cmap(const struct fb_cmap *from, struct fb_cmap *to)
|
|||
|
||||
int fb_cmap_to_user(const struct fb_cmap *from, struct fb_cmap_user *to)
|
||||
{
|
||||
int tooff = 0, fromoff = 0;
|
||||
int size;
|
||||
unsigned int tooff = 0, fromoff = 0;
|
||||
size_t size;
|
||||
|
||||
if (to->start > from->start)
|
||||
fromoff = to->start - from->start;
|
||||
else
|
||||
tooff = from->start - to->start;
|
||||
size = to->len - tooff;
|
||||
if (size > (int) (from->len - fromoff))
|
||||
size = from->len - fromoff;
|
||||
if (size <= 0)
|
||||
if (fromoff >= from->len || tooff >= to->len)
|
||||
return -EINVAL;
|
||||
|
||||
size = min_t(size_t, to->len - tooff, from->len - fromoff);
|
||||
if (size == 0)
|
||||
return -EINVAL;
|
||||
size *= sizeof(u16);
|
||||
|
||||
|
|
|
@ -38,6 +38,7 @@ config FS_DAX
|
|||
bool "Direct Access (DAX) support"
|
||||
depends on MMU
|
||||
depends on !(ARM || MIPS || SPARC)
|
||||
select FS_IOMAP
|
||||
help
|
||||
Direct Access (DAX) can be used on memory-backed block devices.
|
||||
If the block device supports DAX and the filesystem supports DAX,
|
||||
|
|
2
fs/dax.c
2
fs/dax.c
|
@ -990,7 +990,6 @@ int __dax_zero_page_range(struct block_device *bdev, sector_t sector,
|
|||
}
|
||||
EXPORT_SYMBOL_GPL(__dax_zero_page_range);
|
||||
|
||||
#ifdef CONFIG_FS_IOMAP
|
||||
static sector_t dax_iomap_sector(struct iomap *iomap, loff_t pos)
|
||||
{
|
||||
return iomap->blkno + (((pos & PAGE_MASK) - iomap->offset) >> 9);
|
||||
|
@ -1428,4 +1427,3 @@ int dax_iomap_pmd_fault(struct vm_area_struct *vma, unsigned long address,
|
|||
}
|
||||
EXPORT_SYMBOL_GPL(dax_iomap_pmd_fault);
|
||||
#endif /* CONFIG_FS_DAX_PMD */
|
||||
#endif /* CONFIG_FS_IOMAP */
|
||||
|
|
|
@ -1,6 +1,5 @@
|
|||
config EXT2_FS
|
||||
tristate "Second extended fs support"
|
||||
select FS_IOMAP if FS_DAX
|
||||
help
|
||||
Ext2 is a standard Linux file system for hard disks.
|
||||
|
||||
|
|
|
@ -37,7 +37,6 @@ config EXT4_FS
|
|||
select CRC16
|
||||
select CRYPTO
|
||||
select CRYPTO_CRC32C
|
||||
select FS_IOMAP if FS_DAX
|
||||
help
|
||||
This is the next generation of the ext3 filesystem.
|
||||
|
||||
|
|
|
@ -3179,6 +3179,8 @@ int proc_pid_readdir(struct file *file, struct dir_context *ctx)
|
|||
iter.tgid += 1, iter = next_tgid(ns, iter)) {
|
||||
char name[PROC_NUMBUF];
|
||||
int len;
|
||||
|
||||
cond_resched();
|
||||
if (!has_pid_permissions(ns, iter.task, 2))
|
||||
continue;
|
||||
|
||||
|
|
|
@ -74,6 +74,7 @@
|
|||
#include <linux/highmem.h>
|
||||
#include <linux/pagemap.h>
|
||||
#include <linux/uaccess.h>
|
||||
#include <linux/major.h>
|
||||
#include "internal.h"
|
||||
|
||||
static struct kmem_cache *romfs_inode_cachep;
|
||||
|
@ -416,7 +417,22 @@ static void romfs_destroy_inode(struct inode *inode)
|
|||
static int romfs_statfs(struct dentry *dentry, struct kstatfs *buf)
|
||||
{
|
||||
struct super_block *sb = dentry->d_sb;
|
||||
u64 id = huge_encode_dev(sb->s_bdev->bd_dev);
|
||||
u64 id = 0;
|
||||
|
||||
/* When calling huge_encode_dev(),
|
||||
* use sb->s_bdev->bd_dev when,
|
||||
* - CONFIG_ROMFS_ON_BLOCK defined
|
||||
* use sb->s_dev when,
|
||||
* - CONFIG_ROMFS_ON_BLOCK undefined and
|
||||
* - CONFIG_ROMFS_ON_MTD defined
|
||||
* leave id as 0 when,
|
||||
* - CONFIG_ROMFS_ON_BLOCK undefined and
|
||||
* - CONFIG_ROMFS_ON_MTD undefined
|
||||
*/
|
||||
if (sb->s_bdev)
|
||||
id = huge_encode_dev(sb->s_bdev->bd_dev);
|
||||
else if (sb->s_dev)
|
||||
id = huge_encode_dev(sb->s_dev);
|
||||
|
||||
buf->f_type = ROMFS_MAGIC;
|
||||
buf->f_namelen = ROMFS_MAXFN;
|
||||
|
@ -489,6 +505,11 @@ static int romfs_fill_super(struct super_block *sb, void *data, int silent)
|
|||
sb->s_flags |= MS_RDONLY | MS_NOATIME;
|
||||
sb->s_op = &romfs_super_ops;
|
||||
|
||||
#ifdef CONFIG_ROMFS_ON_MTD
|
||||
/* Use same dev ID from the underlying mtdblock device */
|
||||
if (sb->s_mtd)
|
||||
sb->s_dev = MKDEV(MTD_BLOCK_MAJOR, sb->s_mtd->index);
|
||||
#endif
|
||||
/* read the image superblock and check it */
|
||||
rsb = kmalloc(512, GFP_KERNEL);
|
||||
if (!rsb)
|
||||
|
|
|
@ -63,6 +63,7 @@ struct userfaultfd_wait_queue {
|
|||
struct uffd_msg msg;
|
||||
wait_queue_t wq;
|
||||
struct userfaultfd_ctx *ctx;
|
||||
bool waken;
|
||||
};
|
||||
|
||||
struct userfaultfd_wake_range {
|
||||
|
@ -86,6 +87,12 @@ static int userfaultfd_wake_function(wait_queue_t *wq, unsigned mode,
|
|||
if (len && (start > uwq->msg.arg.pagefault.address ||
|
||||
start + len <= uwq->msg.arg.pagefault.address))
|
||||
goto out;
|
||||
WRITE_ONCE(uwq->waken, true);
|
||||
/*
|
||||
* The implicit smp_mb__before_spinlock in try_to_wake_up()
|
||||
* renders uwq->waken visible to other CPUs before the task is
|
||||
* waken.
|
||||
*/
|
||||
ret = wake_up_state(wq->private, mode);
|
||||
if (ret)
|
||||
/*
|
||||
|
@ -264,6 +271,7 @@ int handle_userfault(struct vm_fault *vmf, unsigned long reason)
|
|||
struct userfaultfd_wait_queue uwq;
|
||||
int ret;
|
||||
bool must_wait, return_to_userland;
|
||||
long blocking_state;
|
||||
|
||||
BUG_ON(!rwsem_is_locked(&mm->mmap_sem));
|
||||
|
||||
|
@ -334,10 +342,13 @@ int handle_userfault(struct vm_fault *vmf, unsigned long reason)
|
|||
uwq.wq.private = current;
|
||||
uwq.msg = userfault_msg(vmf->address, vmf->flags, reason);
|
||||
uwq.ctx = ctx;
|
||||
uwq.waken = false;
|
||||
|
||||
return_to_userland =
|
||||
(vmf->flags & (FAULT_FLAG_USER|FAULT_FLAG_KILLABLE)) ==
|
||||
(FAULT_FLAG_USER|FAULT_FLAG_KILLABLE);
|
||||
blocking_state = return_to_userland ? TASK_INTERRUPTIBLE :
|
||||
TASK_KILLABLE;
|
||||
|
||||
spin_lock(&ctx->fault_pending_wqh.lock);
|
||||
/*
|
||||
|
@ -350,8 +361,7 @@ int handle_userfault(struct vm_fault *vmf, unsigned long reason)
|
|||
* following the spin_unlock to happen before the list_add in
|
||||
* __add_wait_queue.
|
||||
*/
|
||||
set_current_state(return_to_userland ? TASK_INTERRUPTIBLE :
|
||||
TASK_KILLABLE);
|
||||
set_current_state(blocking_state);
|
||||
spin_unlock(&ctx->fault_pending_wqh.lock);
|
||||
|
||||
must_wait = userfaultfd_must_wait(ctx, vmf->address, vmf->flags,
|
||||
|
@ -364,6 +374,29 @@ int handle_userfault(struct vm_fault *vmf, unsigned long reason)
|
|||
wake_up_poll(&ctx->fd_wqh, POLLIN);
|
||||
schedule();
|
||||
ret |= VM_FAULT_MAJOR;
|
||||
|
||||
/*
|
||||
* False wakeups can orginate even from rwsem before
|
||||
* up_read() however userfaults will wait either for a
|
||||
* targeted wakeup on the specific uwq waitqueue from
|
||||
* wake_userfault() or for signals or for uffd
|
||||
* release.
|
||||
*/
|
||||
while (!READ_ONCE(uwq.waken)) {
|
||||
/*
|
||||
* This needs the full smp_store_mb()
|
||||
* guarantee as the state write must be
|
||||
* visible to other CPUs before reading
|
||||
* uwq.waken from other CPUs.
|
||||
*/
|
||||
set_current_state(blocking_state);
|
||||
if (READ_ONCE(uwq.waken) ||
|
||||
READ_ONCE(ctx->released) ||
|
||||
(return_to_userland ? signal_pending(current) :
|
||||
fatal_signal_pending(current)))
|
||||
break;
|
||||
schedule();
|
||||
}
|
||||
}
|
||||
|
||||
__set_current_state(TASK_RUNNING);
|
||||
|
|
|
@ -284,7 +284,7 @@ extern void sparse_remove_one_section(struct zone *zone, struct mem_section *ms,
|
|||
unsigned long map_offset);
|
||||
extern struct page *sparse_decode_mem_map(unsigned long coded_mem_map,
|
||||
unsigned long pnum);
|
||||
extern int zone_can_shift(unsigned long pfn, unsigned long nr_pages,
|
||||
enum zone_type target);
|
||||
extern bool zone_can_shift(unsigned long pfn, unsigned long nr_pages,
|
||||
enum zone_type target, int *zone_shift);
|
||||
|
||||
#endif /* __LINUX_MEMORY_HOTPLUG_H */
|
||||
|
|
|
@ -972,12 +972,16 @@ static __always_inline struct zoneref *next_zones_zonelist(struct zoneref *z,
|
|||
* @zonelist - The zonelist to search for a suitable zone
|
||||
* @highest_zoneidx - The zone index of the highest zone to return
|
||||
* @nodes - An optional nodemask to filter the zonelist with
|
||||
* @zone - The first suitable zone found is returned via this parameter
|
||||
* @return - Zoneref pointer for the first suitable zone found (see below)
|
||||
*
|
||||
* This function returns the first zone at or below a given zone index that is
|
||||
* within the allowed nodemask. The zoneref returned is a cursor that can be
|
||||
* used to iterate the zonelist with next_zones_zonelist by advancing it by
|
||||
* one before calling.
|
||||
*
|
||||
* When no eligible zone is found, zoneref->zone is NULL (zoneref itself is
|
||||
* never NULL). This may happen either genuinely, or due to concurrent nodemask
|
||||
* update due to cpuset modification.
|
||||
*/
|
||||
static inline struct zoneref *first_zones_zonelist(struct zonelist *zonelist,
|
||||
enum zone_type highest_zoneidx,
|
||||
|
|
|
@ -110,6 +110,7 @@ extern int watchdog_user_enabled;
|
|||
extern int watchdog_thresh;
|
||||
extern unsigned long watchdog_enabled;
|
||||
extern unsigned long *watchdog_cpumask_bits;
|
||||
extern atomic_t watchdog_park_in_progress;
|
||||
#ifdef CONFIG_SMP
|
||||
extern int sysctl_softlockup_all_cpu_backtrace;
|
||||
extern int sysctl_hardlockup_all_cpu_backtrace;
|
||||
|
|
|
@ -249,7 +249,7 @@ void panic(const char *fmt, ...)
|
|||
* Delay timeout seconds before rebooting the machine.
|
||||
* We can't use the "normal" timers since we just panicked.
|
||||
*/
|
||||
pr_emerg("Rebooting in %d seconds..", panic_timeout);
|
||||
pr_emerg("Rebooting in %d seconds..\n", panic_timeout);
|
||||
|
||||
for (i = 0; i < panic_timeout * 1000; i += PANIC_TIMER_STEP) {
|
||||
touch_nmi_watchdog();
|
||||
|
|
|
@ -49,6 +49,8 @@ unsigned long *watchdog_cpumask_bits = cpumask_bits(&watchdog_cpumask);
|
|||
#define for_each_watchdog_cpu(cpu) \
|
||||
for_each_cpu_and((cpu), cpu_online_mask, &watchdog_cpumask)
|
||||
|
||||
atomic_t watchdog_park_in_progress = ATOMIC_INIT(0);
|
||||
|
||||
/*
|
||||
* The 'watchdog_running' variable is set to 1 when the watchdog threads
|
||||
* are registered/started and is set to 0 when the watchdog threads are
|
||||
|
@ -260,6 +262,9 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
|
|||
int duration;
|
||||
int softlockup_all_cpu_backtrace = sysctl_softlockup_all_cpu_backtrace;
|
||||
|
||||
if (atomic_read(&watchdog_park_in_progress) != 0)
|
||||
return HRTIMER_NORESTART;
|
||||
|
||||
/* kick the hardlockup detector */
|
||||
watchdog_interrupt_count();
|
||||
|
||||
|
@ -467,12 +472,16 @@ static int watchdog_park_threads(void)
|
|||
{
|
||||
int cpu, ret = 0;
|
||||
|
||||
atomic_set(&watchdog_park_in_progress, 1);
|
||||
|
||||
for_each_watchdog_cpu(cpu) {
|
||||
ret = kthread_park(per_cpu(softlockup_watchdog, cpu));
|
||||
if (ret)
|
||||
break;
|
||||
}
|
||||
|
||||
atomic_set(&watchdog_park_in_progress, 0);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
|
|
@ -84,6 +84,9 @@ static void watchdog_overflow_callback(struct perf_event *event,
|
|||
/* Ensure the watchdog never gets throttled */
|
||||
event->hw.interrupts = 0;
|
||||
|
||||
if (atomic_read(&watchdog_park_in_progress) != 0)
|
||||
return;
|
||||
|
||||
if (__this_cpu_read(watchdog_nmi_touch) == true) {
|
||||
__this_cpu_write(watchdog_nmi_touch, false);
|
||||
return;
|
||||
|
|
|
@ -144,4 +144,3 @@ int ioremap_page_range(unsigned long addr,
|
|||
|
||||
return err;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(ioremap_page_range);
|
||||
|
|
|
@ -769,7 +769,7 @@ static void radix_tree_free_nodes(struct radix_tree_node *node)
|
|||
struct radix_tree_node *old = child;
|
||||
offset = child->offset + 1;
|
||||
child = child->parent;
|
||||
WARN_ON_ONCE(!list_empty(&node->private_list));
|
||||
WARN_ON_ONCE(!list_empty(&old->private_list));
|
||||
radix_tree_node_free(old);
|
||||
if (old == entry_to_node(node))
|
||||
return;
|
||||
|
|
|
@ -783,6 +783,12 @@ struct page *follow_devmap_pmd(struct vm_area_struct *vma, unsigned long addr,
|
|||
|
||||
assert_spin_locked(pmd_lockptr(mm, pmd));
|
||||
|
||||
/*
|
||||
* When we COW a devmap PMD entry, we split it into PTEs, so we should
|
||||
* not be in this function with `flags & FOLL_COW` set.
|
||||
*/
|
||||
WARN_ONCE(flags & FOLL_COW, "mm: In follow_devmap_pmd with FOLL_COW set");
|
||||
|
||||
if (flags & FOLL_WRITE && !pmd_write(*pmd))
|
||||
return NULL;
|
||||
|
||||
|
@ -1128,6 +1134,16 @@ out_unlock:
|
|||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* FOLL_FORCE can write to even unwritable pmd's, but only
|
||||
* after we've gone through a COW cycle and they are dirty.
|
||||
*/
|
||||
static inline bool can_follow_write_pmd(pmd_t pmd, unsigned int flags)
|
||||
{
|
||||
return pmd_write(pmd) ||
|
||||
((flags & FOLL_FORCE) && (flags & FOLL_COW) && pmd_dirty(pmd));
|
||||
}
|
||||
|
||||
struct page *follow_trans_huge_pmd(struct vm_area_struct *vma,
|
||||
unsigned long addr,
|
||||
pmd_t *pmd,
|
||||
|
@ -1138,7 +1154,7 @@ struct page *follow_trans_huge_pmd(struct vm_area_struct *vma,
|
|||
|
||||
assert_spin_locked(pmd_lockptr(mm, pmd));
|
||||
|
||||
if (flags & FOLL_WRITE && !pmd_write(*pmd))
|
||||
if (flags & FOLL_WRITE && !can_follow_write_pmd(*pmd, flags))
|
||||
goto out;
|
||||
|
||||
/* Avoid dumping huge zero page */
|
||||
|
|
|
@ -4353,9 +4353,9 @@ static int mem_cgroup_do_precharge(unsigned long count)
|
|||
return ret;
|
||||
}
|
||||
|
||||
/* Try charges one by one with reclaim */
|
||||
/* Try charges one by one with reclaim, but do not retry */
|
||||
while (count--) {
|
||||
ret = try_charge(mc.to, GFP_KERNEL & ~__GFP_NORETRY, 1);
|
||||
ret = try_charge(mc.to, GFP_KERNEL | __GFP_NORETRY, 1);
|
||||
if (ret)
|
||||
return ret;
|
||||
mc.precharge++;
|
||||
|
|
|
@ -1033,36 +1033,39 @@ static void node_states_set_node(int node, struct memory_notify *arg)
|
|||
node_set_state(node, N_MEMORY);
|
||||
}
|
||||
|
||||
int zone_can_shift(unsigned long pfn, unsigned long nr_pages,
|
||||
enum zone_type target)
|
||||
bool zone_can_shift(unsigned long pfn, unsigned long nr_pages,
|
||||
enum zone_type target, int *zone_shift)
|
||||
{
|
||||
struct zone *zone = page_zone(pfn_to_page(pfn));
|
||||
enum zone_type idx = zone_idx(zone);
|
||||
int i;
|
||||
|
||||
*zone_shift = 0;
|
||||
|
||||
if (idx < target) {
|
||||
/* pages must be at end of current zone */
|
||||
if (pfn + nr_pages != zone_end_pfn(zone))
|
||||
return 0;
|
||||
return false;
|
||||
|
||||
/* no zones in use between current zone and target */
|
||||
for (i = idx + 1; i < target; i++)
|
||||
if (zone_is_initialized(zone - idx + i))
|
||||
return 0;
|
||||
return false;
|
||||
}
|
||||
|
||||
if (target < idx) {
|
||||
/* pages must be at beginning of current zone */
|
||||
if (pfn != zone->zone_start_pfn)
|
||||
return 0;
|
||||
return false;
|
||||
|
||||
/* no zones in use between current zone and target */
|
||||
for (i = target + 1; i < idx; i++)
|
||||
if (zone_is_initialized(zone - idx + i))
|
||||
return 0;
|
||||
return false;
|
||||
}
|
||||
|
||||
return target - idx;
|
||||
*zone_shift = target - idx;
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Must be protected by mem_hotplug_begin() */
|
||||
|
@ -1089,10 +1092,13 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages, int online_typ
|
|||
!can_online_high_movable(zone))
|
||||
return -EINVAL;
|
||||
|
||||
if (online_type == MMOP_ONLINE_KERNEL)
|
||||
zone_shift = zone_can_shift(pfn, nr_pages, ZONE_NORMAL);
|
||||
else if (online_type == MMOP_ONLINE_MOVABLE)
|
||||
zone_shift = zone_can_shift(pfn, nr_pages, ZONE_MOVABLE);
|
||||
if (online_type == MMOP_ONLINE_KERNEL) {
|
||||
if (!zone_can_shift(pfn, nr_pages, ZONE_NORMAL, &zone_shift))
|
||||
return -EINVAL;
|
||||
} else if (online_type == MMOP_ONLINE_MOVABLE) {
|
||||
if (!zone_can_shift(pfn, nr_pages, ZONE_MOVABLE, &zone_shift))
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
zone = move_pfn_range(zone_shift, pfn, pfn + nr_pages);
|
||||
if (!zone)
|
||||
|
|
|
@ -2017,8 +2017,8 @@ retry_cpuset:
|
|||
|
||||
nmask = policy_nodemask(gfp, pol);
|
||||
zl = policy_zonelist(gfp, pol, node);
|
||||
mpol_cond_put(pol);
|
||||
page = __alloc_pages_nodemask(gfp, order, zl, nmask);
|
||||
mpol_cond_put(pol);
|
||||
out:
|
||||
if (unlikely(!page && read_mems_allowed_retry(cpuset_mems_cookie)))
|
||||
goto retry_cpuset;
|
||||
|
|
|
@ -3523,12 +3523,13 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
|
|||
struct page *page = NULL;
|
||||
unsigned int alloc_flags;
|
||||
unsigned long did_some_progress;
|
||||
enum compact_priority compact_priority = DEF_COMPACT_PRIORITY;
|
||||
enum compact_priority compact_priority;
|
||||
enum compact_result compact_result;
|
||||
int compaction_retries = 0;
|
||||
int no_progress_loops = 0;
|
||||
int compaction_retries;
|
||||
int no_progress_loops;
|
||||
unsigned long alloc_start = jiffies;
|
||||
unsigned int stall_timeout = 10 * HZ;
|
||||
unsigned int cpuset_mems_cookie;
|
||||
|
||||
/*
|
||||
* In the slowpath, we sanity check order to avoid ever trying to
|
||||
|
@ -3549,6 +3550,23 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
|
|||
(__GFP_ATOMIC|__GFP_DIRECT_RECLAIM)))
|
||||
gfp_mask &= ~__GFP_ATOMIC;
|
||||
|
||||
retry_cpuset:
|
||||
compaction_retries = 0;
|
||||
no_progress_loops = 0;
|
||||
compact_priority = DEF_COMPACT_PRIORITY;
|
||||
cpuset_mems_cookie = read_mems_allowed_begin();
|
||||
/*
|
||||
* We need to recalculate the starting point for the zonelist iterator
|
||||
* because we might have used different nodemask in the fast path, or
|
||||
* there was a cpuset modification and we are retrying - otherwise we
|
||||
* could end up iterating over non-eligible zones endlessly.
|
||||
*/
|
||||
ac->preferred_zoneref = first_zones_zonelist(ac->zonelist,
|
||||
ac->high_zoneidx, ac->nodemask);
|
||||
if (!ac->preferred_zoneref->zone)
|
||||
goto nopage;
|
||||
|
||||
|
||||
/*
|
||||
* The fast path uses conservative alloc_flags to succeed only until
|
||||
* kswapd needs to be woken up, and to avoid the cost of setting up
|
||||
|
@ -3708,6 +3726,13 @@ retry:
|
|||
&compaction_retries))
|
||||
goto retry;
|
||||
|
||||
/*
|
||||
* It's possible we raced with cpuset update so the OOM would be
|
||||
* premature (see below the nopage: label for full explanation).
|
||||
*/
|
||||
if (read_mems_allowed_retry(cpuset_mems_cookie))
|
||||
goto retry_cpuset;
|
||||
|
||||
/* Reclaim has failed us, start killing things */
|
||||
page = __alloc_pages_may_oom(gfp_mask, order, ac, &did_some_progress);
|
||||
if (page)
|
||||
|
@ -3720,6 +3745,16 @@ retry:
|
|||
}
|
||||
|
||||
nopage:
|
||||
/*
|
||||
* When updating a task's mems_allowed or mempolicy nodemask, it is
|
||||
* possible to race with parallel threads in such a way that our
|
||||
* allocation can fail while the mask is being updated. If we are about
|
||||
* to fail, check if the cpuset changed during allocation and if so,
|
||||
* retry.
|
||||
*/
|
||||
if (read_mems_allowed_retry(cpuset_mems_cookie))
|
||||
goto retry_cpuset;
|
||||
|
||||
warn_alloc(gfp_mask,
|
||||
"page allocation failure: order:%u", order);
|
||||
got_pg:
|
||||
|
@ -3734,7 +3769,6 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
|
|||
struct zonelist *zonelist, nodemask_t *nodemask)
|
||||
{
|
||||
struct page *page;
|
||||
unsigned int cpuset_mems_cookie;
|
||||
unsigned int alloc_flags = ALLOC_WMARK_LOW;
|
||||
gfp_t alloc_mask = gfp_mask; /* The gfp_t that was actually used for allocation */
|
||||
struct alloc_context ac = {
|
||||
|
@ -3771,9 +3805,6 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
|
|||
if (IS_ENABLED(CONFIG_CMA) && ac.migratetype == MIGRATE_MOVABLE)
|
||||
alloc_flags |= ALLOC_CMA;
|
||||
|
||||
retry_cpuset:
|
||||
cpuset_mems_cookie = read_mems_allowed_begin();
|
||||
|
||||
/* Dirty zone balancing only done in the fast path */
|
||||
ac.spread_dirty_pages = (gfp_mask & __GFP_WRITE);
|
||||
|
||||
|
@ -3784,8 +3815,13 @@ retry_cpuset:
|
|||
*/
|
||||
ac.preferred_zoneref = first_zones_zonelist(ac.zonelist,
|
||||
ac.high_zoneidx, ac.nodemask);
|
||||
if (!ac.preferred_zoneref) {
|
||||
if (!ac.preferred_zoneref->zone) {
|
||||
page = NULL;
|
||||
/*
|
||||
* This might be due to race with cpuset_current_mems_allowed
|
||||
* update, so make sure we retry with original nodemask in the
|
||||
* slow path.
|
||||
*/
|
||||
goto no_zone;
|
||||
}
|
||||
|
||||
|
@ -3794,6 +3830,7 @@ retry_cpuset:
|
|||
if (likely(page))
|
||||
goto out;
|
||||
|
||||
no_zone:
|
||||
/*
|
||||
* Runtime PM, block IO and its error handling path can deadlock
|
||||
* because I/O on the device might not complete.
|
||||
|
@ -3805,21 +3842,10 @@ retry_cpuset:
|
|||
* Restore the original nodemask if it was potentially replaced with
|
||||
* &cpuset_current_mems_allowed to optimize the fast-path attempt.
|
||||
*/
|
||||
if (cpusets_enabled())
|
||||
if (unlikely(ac.nodemask != nodemask))
|
||||
ac.nodemask = nodemask;
|
||||
page = __alloc_pages_slowpath(alloc_mask, order, &ac);
|
||||
|
||||
no_zone:
|
||||
/*
|
||||
* When updating a task's mems_allowed, it is possible to race with
|
||||
* parallel threads in such a way that an allocation can fail while
|
||||
* the mask is being updated. If a page allocation is about to fail,
|
||||
* check if the cpuset changed during allocation and if so, retry.
|
||||
*/
|
||||
if (unlikely(!page && read_mems_allowed_retry(cpuset_mems_cookie))) {
|
||||
alloc_mask = gfp_mask;
|
||||
goto retry_cpuset;
|
||||
}
|
||||
page = __alloc_pages_slowpath(alloc_mask, order, &ac);
|
||||
|
||||
out:
|
||||
if (memcg_kmem_enabled() && (gfp_mask & __GFP_ACCOUNT) && page &&
|
||||
|
@ -7248,6 +7274,7 @@ int alloc_contig_range(unsigned long start, unsigned long end,
|
|||
.zone = page_zone(pfn_to_page(start)),
|
||||
.mode = MIGRATE_SYNC,
|
||||
.ignore_skip_hint = true,
|
||||
.gfp_mask = GFP_KERNEL,
|
||||
};
|
||||
INIT_LIST_HEAD(&cc.migratepages);
|
||||
|
||||
|
|
23
mm/slub.c
23
mm/slub.c
|
@ -496,10 +496,11 @@ static inline int check_valid_pointer(struct kmem_cache *s,
|
|||
return 1;
|
||||
}
|
||||
|
||||
static void print_section(char *text, u8 *addr, unsigned int length)
|
||||
static void print_section(char *level, char *text, u8 *addr,
|
||||
unsigned int length)
|
||||
{
|
||||
metadata_access_enable();
|
||||
print_hex_dump(KERN_ERR, text, DUMP_PREFIX_ADDRESS, 16, 1, addr,
|
||||
print_hex_dump(level, text, DUMP_PREFIX_ADDRESS, 16, 1, addr,
|
||||
length, 1);
|
||||
metadata_access_disable();
|
||||
}
|
||||
|
@ -636,14 +637,15 @@ static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p)
|
|||
p, p - addr, get_freepointer(s, p));
|
||||
|
||||
if (s->flags & SLAB_RED_ZONE)
|
||||
print_section("Redzone ", p - s->red_left_pad, s->red_left_pad);
|
||||
print_section(KERN_ERR, "Redzone ", p - s->red_left_pad,
|
||||
s->red_left_pad);
|
||||
else if (p > addr + 16)
|
||||
print_section("Bytes b4 ", p - 16, 16);
|
||||
print_section(KERN_ERR, "Bytes b4 ", p - 16, 16);
|
||||
|
||||
print_section("Object ", p, min_t(unsigned long, s->object_size,
|
||||
PAGE_SIZE));
|
||||
print_section(KERN_ERR, "Object ", p,
|
||||
min_t(unsigned long, s->object_size, PAGE_SIZE));
|
||||
if (s->flags & SLAB_RED_ZONE)
|
||||
print_section("Redzone ", p + s->object_size,
|
||||
print_section(KERN_ERR, "Redzone ", p + s->object_size,
|
||||
s->inuse - s->object_size);
|
||||
|
||||
if (s->offset)
|
||||
|
@ -658,7 +660,8 @@ static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p)
|
|||
|
||||
if (off != size_from_object(s))
|
||||
/* Beginning of the filler is the free pointer */
|
||||
print_section("Padding ", p + off, size_from_object(s) - off);
|
||||
print_section(KERN_ERR, "Padding ", p + off,
|
||||
size_from_object(s) - off);
|
||||
|
||||
dump_stack();
|
||||
}
|
||||
|
@ -820,7 +823,7 @@ static int slab_pad_check(struct kmem_cache *s, struct page *page)
|
|||
end--;
|
||||
|
||||
slab_err(s, page, "Padding overwritten. 0x%p-0x%p", fault, end - 1);
|
||||
print_section("Padding ", end - remainder, remainder);
|
||||
print_section(KERN_ERR, "Padding ", end - remainder, remainder);
|
||||
|
||||
restore_bytes(s, "slab padding", POISON_INUSE, end - remainder, end);
|
||||
return 0;
|
||||
|
@ -973,7 +976,7 @@ static void trace(struct kmem_cache *s, struct page *page, void *object,
|
|||
page->freelist);
|
||||
|
||||
if (!alloc)
|
||||
print_section("Object ", (void *)object,
|
||||
print_section(KERN_INFO, "Object ", (void *)object,
|
||||
s->object_size);
|
||||
|
||||
dump_stack();
|
||||
|
|
Loading…
Reference in New Issue