Merge branch 'akpm' (patches from Andrew Morton)
Merge fixes from Andrew Morton: "The nmi patch and watchdog patch aren't actually fixes - they're features which needed a few last-minutes touchups. Otherwise, a rather large batch of fixes - ocfs2 review takes a while and I got distracted and missed last week's batch" * emailed patches from Andrew Morton <akpm@linux-foundation.org>: (31 commits) ocfs2/dlm: do not purge lockres that is queued for assert master ocfs2: do not return DLM_MIGRATE_RESPONSE_MASTERY_REF to avoid endless,loop during umount ocfs2: manually do the iput once ocfs2_add_entry failed in ocfs2_symlink and ocfs2_mknod ocfs2: fix a tiny race when running dirop_fileop_racer ocfs2/dlm: fix misuse of list_move_tail() in dlm_run_purge_list() ocfs2: refcount: take rw_lock in ocfs2_reflink ocfs2: revert "ocfs2: fix NULL pointer dereference when dismount and ocfs2rec simultaneously" ocfs2: fix deadlock when two nodes are converting same lock from PR to EX and idletimeout closes conn ocfs2: should add inode into orphan dir after updating entry in ocfs2_rename() mm: fix crashes from mbind() merging vmas checkpatch: reduce false positives when checking void function return statements ia64: arch/ia64/include/uapi/asm/fcntl.h needs personality.h DMA, CMA: fix possible memory leak slab: fix oops when reading /proc/slab_allocators shmem: fix faulting into a hole while it's punched mm: let mm_find_pmd fix buggy race with THP fault mm: thp: fix DEBUG_PAGEALLOC oops in copy_page_rep() kernel/watchdog.c: print traces for all cpus on lockup detection nmi: provide the option to issue an NMI back trace to every cpu but current Documentation/accounting/getdelays.c: add missing null-terminate after strncpy call ...
This commit is contained in:
commit
04b5da4a14
4
CREDITS
4
CREDITS
|
@ -9,6 +9,10 @@
|
|||
Linus
|
||||
----------
|
||||
|
||||
M: Matt Mackal
|
||||
E: mpm@selenic.com
|
||||
D: SLOB slab allocator
|
||||
|
||||
N: Matti Aarnio
|
||||
E: mea@nic.funet.fi
|
||||
D: Alpha systems hacking, IPv6 and other network related stuff
|
||||
|
|
|
@ -314,6 +314,7 @@ int main(int argc, char *argv[])
|
|||
break;
|
||||
case 'm':
|
||||
strncpy(cpumask, optarg, sizeof(cpumask));
|
||||
cpumask[sizeof(cpumask) - 1] = '\0';
|
||||
maskset = 1;
|
||||
printf("cpumask %s maskset %d\n", cpumask, maskset);
|
||||
break;
|
||||
|
|
|
@ -3130,6 +3130,11 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
|
|||
[KNL] Should the soft-lockup detector generate panics.
|
||||
Format: <integer>
|
||||
|
||||
softlockup_all_cpu_backtrace=
|
||||
[KNL] Should the soft-lockup detector generate
|
||||
backtraces on all cpus.
|
||||
Format: <integer>
|
||||
|
||||
sonypi.*= [HW] Sony Programmable I/O Control Device driver
|
||||
See Documentation/laptops/sonypi.txt
|
||||
|
||||
|
|
|
@ -209,15 +209,12 @@ If memory device is found, memory hotplug code will be called.
|
|||
|
||||
4.2 Notify memory hot-add event by hand
|
||||
------------
|
||||
On powerpc, the firmware does not notify a memory hotplug event to the kernel.
|
||||
Therefore, "probe" interface is supported to notify the event to the kernel.
|
||||
This interface depends on CONFIG_ARCH_MEMORY_PROBE.
|
||||
|
||||
CONFIG_ARCH_MEMORY_PROBE is supported on powerpc only. On x86, this config
|
||||
option is disabled by default since ACPI notifies a memory hotplug event to
|
||||
the kernel, which performs its hotplug operation as the result. Please
|
||||
enable this option if you need the "probe" interface for testing purposes
|
||||
on x86.
|
||||
On some architectures, the firmware may not notify the kernel of a memory
|
||||
hotplug event. Therefore, the memory "probe" interface is supported to
|
||||
explicitly notify the kernel. This interface depends on
|
||||
CONFIG_ARCH_MEMORY_PROBE and can be configured on powerpc, sh, and x86
|
||||
if hotplug is supported, although for x86 this should be handled by ACPI
|
||||
notification.
|
||||
|
||||
Probe interface is located at
|
||||
/sys/devices/system/memory/probe
|
||||
|
|
|
@ -75,6 +75,7 @@ show up in /proc/sys/kernel:
|
|||
- shmall
|
||||
- shmmax [ sysv ipc ]
|
||||
- shmmni
|
||||
- softlockup_all_cpu_backtrace
|
||||
- stop-a [ SPARC only ]
|
||||
- sysrq ==> Documentation/sysrq.txt
|
||||
- sysctl_writes_strict
|
||||
|
@ -783,6 +784,22 @@ via the /proc/sys interface:
|
|||
|
||||
==============================================================
|
||||
|
||||
softlockup_all_cpu_backtrace:
|
||||
|
||||
This value controls the soft lockup detector thread's behavior
|
||||
when a soft lockup condition is detected as to whether or not
|
||||
to gather further debug information. If enabled, each cpu will
|
||||
be issued an NMI and instructed to capture stack trace.
|
||||
|
||||
This feature is only applicable for architectures which support
|
||||
NMI.
|
||||
|
||||
0: do nothing. This is the default behavior.
|
||||
|
||||
1: on detection capture more debug information.
|
||||
|
||||
==============================================================
|
||||
|
||||
tainted:
|
||||
|
||||
Non-zero if the kernel has been tainted. Numeric values, which
|
||||
|
|
|
@ -702,7 +702,8 @@ The batch value of each per cpu pagelist is also updated as a result. It is
|
|||
set to pcp->high/4. The upper limit of batch is (PAGE_SHIFT * 8)
|
||||
|
||||
The initial value is zero. Kernel does not use this value at boot time to set
|
||||
the high water marks for each per cpu page list.
|
||||
the high water marks for each per cpu page list. If the user writes '0' to this
|
||||
sysctl, it will revert to this default behavior.
|
||||
|
||||
==============================================================
|
||||
|
||||
|
|
|
@ -8196,13 +8196,15 @@ S: Maintained
|
|||
F: drivers/usb/misc/sisusbvga/
|
||||
|
||||
SLAB ALLOCATOR
|
||||
M: Christoph Lameter <cl@linux-foundation.org>
|
||||
M: Christoph Lameter <cl@linux.com>
|
||||
M: Pekka Enberg <penberg@kernel.org>
|
||||
M: Matt Mackall <mpm@selenic.com>
|
||||
M: David Rientjes <rientjes@google.com>
|
||||
M: Joonsoo Kim <iamjoonsoo.kim@lge.com>
|
||||
M: Andrew Morton <akpm@linux-foundation.org>
|
||||
L: linux-mm@kvack.org
|
||||
S: Maintained
|
||||
F: include/linux/sl?b*.h
|
||||
F: mm/sl?b.c
|
||||
F: mm/sl?b*
|
||||
|
||||
SLEEPABLE READ-COPY UPDATE (SRCU)
|
||||
M: Lai Jiangshan <laijs@cn.fujitsu.com>
|
||||
|
|
|
@ -8,6 +8,7 @@
|
|||
#define force_o_largefile() \
|
||||
(personality(current->personality) != PER_LINUX32)
|
||||
|
||||
#include <linux/personality.h>
|
||||
#include <asm-generic/fcntl.h>
|
||||
|
||||
#endif /* _ASM_IA64_FCNTL_H */
|
||||
|
|
|
@ -89,7 +89,7 @@ static inline unsigned long get_softint(void)
|
|||
return retval;
|
||||
}
|
||||
|
||||
void arch_trigger_all_cpu_backtrace(void);
|
||||
void arch_trigger_all_cpu_backtrace(bool);
|
||||
#define arch_trigger_all_cpu_backtrace arch_trigger_all_cpu_backtrace
|
||||
|
||||
extern void *hardirq_stack[NR_CPUS];
|
||||
|
|
|
@ -239,7 +239,7 @@ static void __global_reg_poll(struct global_reg_snapshot *gp)
|
|||
}
|
||||
}
|
||||
|
||||
void arch_trigger_all_cpu_backtrace(void)
|
||||
void arch_trigger_all_cpu_backtrace(bool include_self)
|
||||
{
|
||||
struct thread_info *tp = current_thread_info();
|
||||
struct pt_regs *regs = get_irq_regs();
|
||||
|
@ -251,16 +251,22 @@ void arch_trigger_all_cpu_backtrace(void)
|
|||
|
||||
spin_lock_irqsave(&global_cpu_snapshot_lock, flags);
|
||||
|
||||
memset(global_cpu_snapshot, 0, sizeof(global_cpu_snapshot));
|
||||
|
||||
this_cpu = raw_smp_processor_id();
|
||||
|
||||
__global_reg_self(tp, regs, this_cpu);
|
||||
memset(global_cpu_snapshot, 0, sizeof(global_cpu_snapshot));
|
||||
|
||||
if (include_self)
|
||||
__global_reg_self(tp, regs, this_cpu);
|
||||
|
||||
smp_fetch_global_regs();
|
||||
|
||||
for_each_online_cpu(cpu) {
|
||||
struct global_reg_snapshot *gp = &global_cpu_snapshot[cpu].reg;
|
||||
struct global_reg_snapshot *gp;
|
||||
|
||||
if (!include_self && cpu == this_cpu)
|
||||
continue;
|
||||
|
||||
gp = &global_cpu_snapshot[cpu].reg;
|
||||
|
||||
__global_reg_poll(gp);
|
||||
|
||||
|
@ -292,7 +298,7 @@ void arch_trigger_all_cpu_backtrace(void)
|
|||
|
||||
static void sysrq_handle_globreg(int key)
|
||||
{
|
||||
arch_trigger_all_cpu_backtrace();
|
||||
arch_trigger_all_cpu_backtrace(true);
|
||||
}
|
||||
|
||||
static struct sysrq_key_op sparc_globalreg_op = {
|
||||
|
|
|
@ -43,7 +43,7 @@ extern int vector_used_by_percpu_irq(unsigned int vector);
|
|||
extern void init_ISA_irqs(void);
|
||||
|
||||
#ifdef CONFIG_X86_LOCAL_APIC
|
||||
void arch_trigger_all_cpu_backtrace(void);
|
||||
void arch_trigger_all_cpu_backtrace(bool);
|
||||
#define arch_trigger_all_cpu_backtrace arch_trigger_all_cpu_backtrace
|
||||
#endif
|
||||
|
||||
|
|
|
@ -33,31 +33,41 @@ static DECLARE_BITMAP(backtrace_mask, NR_CPUS) __read_mostly;
|
|||
/* "in progress" flag of arch_trigger_all_cpu_backtrace */
|
||||
static unsigned long backtrace_flag;
|
||||
|
||||
void arch_trigger_all_cpu_backtrace(void)
|
||||
void arch_trigger_all_cpu_backtrace(bool include_self)
|
||||
{
|
||||
int i;
|
||||
int cpu = get_cpu();
|
||||
|
||||
if (test_and_set_bit(0, &backtrace_flag))
|
||||
if (test_and_set_bit(0, &backtrace_flag)) {
|
||||
/*
|
||||
* If there is already a trigger_all_cpu_backtrace() in progress
|
||||
* (backtrace_flag == 1), don't output double cpu dump infos.
|
||||
*/
|
||||
put_cpu();
|
||||
return;
|
||||
}
|
||||
|
||||
cpumask_copy(to_cpumask(backtrace_mask), cpu_online_mask);
|
||||
if (!include_self)
|
||||
cpumask_clear_cpu(cpu, to_cpumask(backtrace_mask));
|
||||
|
||||
printk(KERN_INFO "sending NMI to all CPUs:\n");
|
||||
apic->send_IPI_all(NMI_VECTOR);
|
||||
if (!cpumask_empty(to_cpumask(backtrace_mask))) {
|
||||
pr_info("sending NMI to %s CPUs:\n",
|
||||
(include_self ? "all" : "other"));
|
||||
apic->send_IPI_mask(to_cpumask(backtrace_mask), NMI_VECTOR);
|
||||
}
|
||||
|
||||
/* Wait for up to 10 seconds for all CPUs to do the backtrace */
|
||||
for (i = 0; i < 10 * 1000; i++) {
|
||||
if (cpumask_empty(to_cpumask(backtrace_mask)))
|
||||
break;
|
||||
mdelay(1);
|
||||
touch_softlockup_watchdog();
|
||||
}
|
||||
|
||||
clear_bit(0, &backtrace_flag);
|
||||
smp_mb__after_atomic();
|
||||
put_cpu();
|
||||
}
|
||||
|
||||
static int
|
||||
|
|
|
@ -176,14 +176,24 @@ static int __init cma_activate_area(struct cma *cma)
|
|||
base_pfn = pfn;
|
||||
for (j = pageblock_nr_pages; j; --j, pfn++) {
|
||||
WARN_ON_ONCE(!pfn_valid(pfn));
|
||||
/*
|
||||
* alloc_contig_range requires the pfn range
|
||||
* specified to be in the same zone. Make this
|
||||
* simple by forcing the entire CMA resv range
|
||||
* to be in the same zone.
|
||||
*/
|
||||
if (page_zone(pfn_to_page(pfn)) != zone)
|
||||
return -EINVAL;
|
||||
goto err;
|
||||
}
|
||||
init_cma_reserved_pageblock(pfn_to_page(base_pfn));
|
||||
} while (--i);
|
||||
|
||||
mutex_init(&cma->lock);
|
||||
return 0;
|
||||
|
||||
err:
|
||||
kfree(cma->bitmap);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
static struct cma cma_areas[MAX_CMA_AREAS];
|
||||
|
|
|
@ -601,6 +601,7 @@ static int rtsx_pci_ms_drv_remove(struct platform_device *pdev)
|
|||
pcr->slots[RTSX_MS_CARD].card_event = NULL;
|
||||
msh = host->msh;
|
||||
host->eject = true;
|
||||
cancel_work_sync(&host->handle_req);
|
||||
|
||||
mutex_lock(&host->host_mutex);
|
||||
if (host->req) {
|
||||
|
|
|
@ -331,6 +331,7 @@ struct dlm_lock_resource
|
|||
u16 state;
|
||||
char lvb[DLM_LVB_LEN];
|
||||
unsigned int inflight_locks;
|
||||
unsigned int inflight_assert_workers;
|
||||
unsigned long refmap[BITS_TO_LONGS(O2NM_MAX_NODES)];
|
||||
};
|
||||
|
||||
|
@ -910,6 +911,9 @@ void dlm_lockres_drop_inflight_ref(struct dlm_ctxt *dlm,
|
|||
void dlm_lockres_grab_inflight_ref(struct dlm_ctxt *dlm,
|
||||
struct dlm_lock_resource *res);
|
||||
|
||||
void __dlm_lockres_grab_inflight_worker(struct dlm_ctxt *dlm,
|
||||
struct dlm_lock_resource *res);
|
||||
|
||||
void dlm_queue_ast(struct dlm_ctxt *dlm, struct dlm_lock *lock);
|
||||
void dlm_queue_bast(struct dlm_ctxt *dlm, struct dlm_lock *lock);
|
||||
void __dlm_queue_ast(struct dlm_ctxt *dlm, struct dlm_lock *lock);
|
||||
|
|
|
@ -581,6 +581,7 @@ static void dlm_init_lockres(struct dlm_ctxt *dlm,
|
|||
atomic_set(&res->asts_reserved, 0);
|
||||
res->migration_pending = 0;
|
||||
res->inflight_locks = 0;
|
||||
res->inflight_assert_workers = 0;
|
||||
|
||||
res->dlm = dlm;
|
||||
|
||||
|
@ -683,6 +684,43 @@ void dlm_lockres_drop_inflight_ref(struct dlm_ctxt *dlm,
|
|||
wake_up(&res->wq);
|
||||
}
|
||||
|
||||
void __dlm_lockres_grab_inflight_worker(struct dlm_ctxt *dlm,
|
||||
struct dlm_lock_resource *res)
|
||||
{
|
||||
assert_spin_locked(&res->spinlock);
|
||||
res->inflight_assert_workers++;
|
||||
mlog(0, "%s:%.*s: inflight assert worker++: now %u\n",
|
||||
dlm->name, res->lockname.len, res->lockname.name,
|
||||
res->inflight_assert_workers);
|
||||
}
|
||||
|
||||
static void dlm_lockres_grab_inflight_worker(struct dlm_ctxt *dlm,
|
||||
struct dlm_lock_resource *res)
|
||||
{
|
||||
spin_lock(&res->spinlock);
|
||||
__dlm_lockres_grab_inflight_worker(dlm, res);
|
||||
spin_unlock(&res->spinlock);
|
||||
}
|
||||
|
||||
static void __dlm_lockres_drop_inflight_worker(struct dlm_ctxt *dlm,
|
||||
struct dlm_lock_resource *res)
|
||||
{
|
||||
assert_spin_locked(&res->spinlock);
|
||||
BUG_ON(res->inflight_assert_workers == 0);
|
||||
res->inflight_assert_workers--;
|
||||
mlog(0, "%s:%.*s: inflight assert worker--: now %u\n",
|
||||
dlm->name, res->lockname.len, res->lockname.name,
|
||||
res->inflight_assert_workers);
|
||||
}
|
||||
|
||||
static void dlm_lockres_drop_inflight_worker(struct dlm_ctxt *dlm,
|
||||
struct dlm_lock_resource *res)
|
||||
{
|
||||
spin_lock(&res->spinlock);
|
||||
__dlm_lockres_drop_inflight_worker(dlm, res);
|
||||
spin_unlock(&res->spinlock);
|
||||
}
|
||||
|
||||
/*
|
||||
* lookup a lock resource by name.
|
||||
* may already exist in the hashtable.
|
||||
|
@ -1603,7 +1641,8 @@ send_response:
|
|||
mlog(ML_ERROR, "failed to dispatch assert master work\n");
|
||||
response = DLM_MASTER_RESP_ERROR;
|
||||
dlm_lockres_put(res);
|
||||
}
|
||||
} else
|
||||
dlm_lockres_grab_inflight_worker(dlm, res);
|
||||
} else {
|
||||
if (res)
|
||||
dlm_lockres_put(res);
|
||||
|
@ -2118,6 +2157,8 @@ static void dlm_assert_master_worker(struct dlm_work_item *item, void *data)
|
|||
dlm_lockres_release_ast(dlm, res);
|
||||
|
||||
put:
|
||||
dlm_lockres_drop_inflight_worker(dlm, res);
|
||||
|
||||
dlm_lockres_put(res);
|
||||
|
||||
mlog(0, "finished with dlm_assert_master_worker\n");
|
||||
|
@ -3088,11 +3129,15 @@ static int dlm_add_migration_mle(struct dlm_ctxt *dlm,
|
|||
/* remove it so that only one mle will be found */
|
||||
__dlm_unlink_mle(dlm, tmp);
|
||||
__dlm_mle_detach_hb_events(dlm, tmp);
|
||||
ret = DLM_MIGRATE_RESPONSE_MASTERY_REF;
|
||||
mlog(0, "%s:%.*s: master=%u, newmaster=%u, "
|
||||
"telling master to get ref for cleared out mle "
|
||||
"during migration\n", dlm->name, namelen, name,
|
||||
master, new_master);
|
||||
if (tmp->type == DLM_MLE_MASTER) {
|
||||
ret = DLM_MIGRATE_RESPONSE_MASTERY_REF;
|
||||
mlog(0, "%s:%.*s: master=%u, newmaster=%u, "
|
||||
"telling master to get ref "
|
||||
"for cleared out mle during "
|
||||
"migration\n", dlm->name,
|
||||
namelen, name, master,
|
||||
new_master);
|
||||
}
|
||||
}
|
||||
spin_unlock(&tmp->spinlock);
|
||||
}
|
||||
|
|
|
@ -1708,7 +1708,8 @@ int dlm_master_requery_handler(struct o2net_msg *msg, u32 len, void *data,
|
|||
mlog_errno(-ENOMEM);
|
||||
/* retry!? */
|
||||
BUG();
|
||||
}
|
||||
} else
|
||||
__dlm_lockres_grab_inflight_worker(dlm, res);
|
||||
} else /* put.. incase we are not the master */
|
||||
dlm_lockres_put(res);
|
||||
spin_unlock(&res->spinlock);
|
||||
|
|
|
@ -259,12 +259,15 @@ static void dlm_run_purge_list(struct dlm_ctxt *dlm,
|
|||
* refs on it. */
|
||||
unused = __dlm_lockres_unused(lockres);
|
||||
if (!unused ||
|
||||
(lockres->state & DLM_LOCK_RES_MIGRATING)) {
|
||||
(lockres->state & DLM_LOCK_RES_MIGRATING) ||
|
||||
(lockres->inflight_assert_workers != 0)) {
|
||||
mlog(0, "%s: res %.*s is in use or being remastered, "
|
||||
"used %d, state %d\n", dlm->name,
|
||||
lockres->lockname.len, lockres->lockname.name,
|
||||
!unused, lockres->state);
|
||||
list_move_tail(&dlm->purge_list, &lockres->purge);
|
||||
"used %d, state %d, assert master workers %u\n",
|
||||
dlm->name, lockres->lockname.len,
|
||||
lockres->lockname.name,
|
||||
!unused, lockres->state,
|
||||
lockres->inflight_assert_workers);
|
||||
list_move_tail(&lockres->purge, &dlm->purge_list);
|
||||
spin_unlock(&lockres->spinlock);
|
||||
continue;
|
||||
}
|
||||
|
|
|
@ -191,7 +191,9 @@ static enum dlm_status dlmunlock_common(struct dlm_ctxt *dlm,
|
|||
DLM_UNLOCK_CLEAR_CONVERT_TYPE);
|
||||
} else if (status == DLM_RECOVERING ||
|
||||
status == DLM_MIGRATING ||
|
||||
status == DLM_FORWARD) {
|
||||
status == DLM_FORWARD ||
|
||||
status == DLM_NOLOCKMGR
|
||||
) {
|
||||
/* must clear the actions because this unlock
|
||||
* is about to be retried. cannot free or do
|
||||
* any list manipulation. */
|
||||
|
@ -200,7 +202,8 @@ static enum dlm_status dlmunlock_common(struct dlm_ctxt *dlm,
|
|||
res->lockname.name,
|
||||
status==DLM_RECOVERING?"recovering":
|
||||
(status==DLM_MIGRATING?"migrating":
|
||||
"forward"));
|
||||
(status == DLM_FORWARD ? "forward" :
|
||||
"nolockmanager")));
|
||||
actions = 0;
|
||||
}
|
||||
if (flags & LKM_CANCEL)
|
||||
|
@ -364,7 +367,10 @@ static enum dlm_status dlm_send_remote_unlock_request(struct dlm_ctxt *dlm,
|
|||
* updated state to the recovery master. this thread
|
||||
* just needs to finish out the operation and call
|
||||
* the unlockast. */
|
||||
ret = DLM_NORMAL;
|
||||
if (dlm_is_node_dead(dlm, owner))
|
||||
ret = DLM_NORMAL;
|
||||
else
|
||||
ret = DLM_NOLOCKMGR;
|
||||
} else {
|
||||
/* something bad. this will BUG in ocfs2 */
|
||||
ret = dlm_err_to_dlm_status(tmpret);
|
||||
|
@ -638,7 +644,9 @@ retry:
|
|||
|
||||
if (status == DLM_RECOVERING ||
|
||||
status == DLM_MIGRATING ||
|
||||
status == DLM_FORWARD) {
|
||||
status == DLM_FORWARD ||
|
||||
status == DLM_NOLOCKMGR) {
|
||||
|
||||
/* We want to go away for a tiny bit to allow recovery
|
||||
* / migration to complete on this resource. I don't
|
||||
* know of any wait queue we could sleep on as this
|
||||
|
@ -650,7 +658,7 @@ retry:
|
|||
msleep(50);
|
||||
|
||||
mlog(0, "retrying unlock due to pending recovery/"
|
||||
"migration/in-progress\n");
|
||||
"migration/in-progress/reconnect\n");
|
||||
goto retry;
|
||||
}
|
||||
|
||||
|
|
145
fs/ocfs2/namei.c
145
fs/ocfs2/namei.c
|
@ -205,6 +205,21 @@ static struct inode *ocfs2_get_init_inode(struct inode *dir, umode_t mode)
|
|||
return inode;
|
||||
}
|
||||
|
||||
static void ocfs2_cleanup_add_entry_failure(struct ocfs2_super *osb,
|
||||
struct dentry *dentry, struct inode *inode)
|
||||
{
|
||||
struct ocfs2_dentry_lock *dl = dentry->d_fsdata;
|
||||
|
||||
ocfs2_simple_drop_lockres(osb, &dl->dl_lockres);
|
||||
ocfs2_lock_res_free(&dl->dl_lockres);
|
||||
BUG_ON(dl->dl_count != 1);
|
||||
spin_lock(&dentry_attach_lock);
|
||||
dentry->d_fsdata = NULL;
|
||||
spin_unlock(&dentry_attach_lock);
|
||||
kfree(dl);
|
||||
iput(inode);
|
||||
}
|
||||
|
||||
static int ocfs2_mknod(struct inode *dir,
|
||||
struct dentry *dentry,
|
||||
umode_t mode,
|
||||
|
@ -231,6 +246,7 @@ static int ocfs2_mknod(struct inode *dir,
|
|||
sigset_t oldset;
|
||||
int did_block_signals = 0;
|
||||
struct posix_acl *default_acl = NULL, *acl = NULL;
|
||||
struct ocfs2_dentry_lock *dl = NULL;
|
||||
|
||||
trace_ocfs2_mknod(dir, dentry, dentry->d_name.len, dentry->d_name.name,
|
||||
(unsigned long long)OCFS2_I(dir)->ip_blkno,
|
||||
|
@ -423,6 +439,8 @@ static int ocfs2_mknod(struct inode *dir,
|
|||
goto leave;
|
||||
}
|
||||
|
||||
dl = dentry->d_fsdata;
|
||||
|
||||
status = ocfs2_add_entry(handle, dentry, inode,
|
||||
OCFS2_I(inode)->ip_blkno, parent_fe_bh,
|
||||
&lookup);
|
||||
|
@ -469,6 +487,9 @@ leave:
|
|||
* ocfs2_delete_inode will mutex_lock again.
|
||||
*/
|
||||
if ((status < 0) && inode) {
|
||||
if (dl)
|
||||
ocfs2_cleanup_add_entry_failure(osb, dentry, inode);
|
||||
|
||||
OCFS2_I(inode)->ip_flags |= OCFS2_INODE_SKIP_ORPHAN_DIR;
|
||||
clear_nlink(inode);
|
||||
iput(inode);
|
||||
|
@ -991,6 +1012,65 @@ leave:
|
|||
return status;
|
||||
}
|
||||
|
||||
static int ocfs2_check_if_ancestor(struct ocfs2_super *osb,
|
||||
u64 src_inode_no, u64 dest_inode_no)
|
||||
{
|
||||
int ret = 0, i = 0;
|
||||
u64 parent_inode_no = 0;
|
||||
u64 child_inode_no = src_inode_no;
|
||||
struct inode *child_inode;
|
||||
|
||||
#define MAX_LOOKUP_TIMES 32
|
||||
while (1) {
|
||||
child_inode = ocfs2_iget(osb, child_inode_no, 0, 0);
|
||||
if (IS_ERR(child_inode)) {
|
||||
ret = PTR_ERR(child_inode);
|
||||
break;
|
||||
}
|
||||
|
||||
ret = ocfs2_inode_lock(child_inode, NULL, 0);
|
||||
if (ret < 0) {
|
||||
iput(child_inode);
|
||||
if (ret != -ENOENT)
|
||||
mlog_errno(ret);
|
||||
break;
|
||||
}
|
||||
|
||||
ret = ocfs2_lookup_ino_from_name(child_inode, "..", 2,
|
||||
&parent_inode_no);
|
||||
ocfs2_inode_unlock(child_inode, 0);
|
||||
iput(child_inode);
|
||||
if (ret < 0) {
|
||||
ret = -ENOENT;
|
||||
break;
|
||||
}
|
||||
|
||||
if (parent_inode_no == dest_inode_no) {
|
||||
ret = 1;
|
||||
break;
|
||||
}
|
||||
|
||||
if (parent_inode_no == osb->root_inode->i_ino) {
|
||||
ret = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
child_inode_no = parent_inode_no;
|
||||
|
||||
if (++i >= MAX_LOOKUP_TIMES) {
|
||||
mlog(ML_NOTICE, "max lookup times reached, filesystem "
|
||||
"may have nested directories, "
|
||||
"src inode: %llu, dest inode: %llu.\n",
|
||||
(unsigned long long)src_inode_no,
|
||||
(unsigned long long)dest_inode_no);
|
||||
ret = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* The only place this should be used is rename!
|
||||
* if they have the same id, then the 1st one is the only one locked.
|
||||
|
@ -1002,6 +1082,7 @@ static int ocfs2_double_lock(struct ocfs2_super *osb,
|
|||
struct inode *inode2)
|
||||
{
|
||||
int status;
|
||||
int inode1_is_ancestor, inode2_is_ancestor;
|
||||
struct ocfs2_inode_info *oi1 = OCFS2_I(inode1);
|
||||
struct ocfs2_inode_info *oi2 = OCFS2_I(inode2);
|
||||
struct buffer_head **tmpbh;
|
||||
|
@ -1015,9 +1096,26 @@ static int ocfs2_double_lock(struct ocfs2_super *osb,
|
|||
if (*bh2)
|
||||
*bh2 = NULL;
|
||||
|
||||
/* we always want to lock the one with the lower lockid first. */
|
||||
/* we always want to lock the one with the lower lockid first.
|
||||
* and if they are nested, we lock ancestor first */
|
||||
if (oi1->ip_blkno != oi2->ip_blkno) {
|
||||
if (oi1->ip_blkno < oi2->ip_blkno) {
|
||||
inode1_is_ancestor = ocfs2_check_if_ancestor(osb, oi2->ip_blkno,
|
||||
oi1->ip_blkno);
|
||||
if (inode1_is_ancestor < 0) {
|
||||
status = inode1_is_ancestor;
|
||||
goto bail;
|
||||
}
|
||||
|
||||
inode2_is_ancestor = ocfs2_check_if_ancestor(osb, oi1->ip_blkno,
|
||||
oi2->ip_blkno);
|
||||
if (inode2_is_ancestor < 0) {
|
||||
status = inode2_is_ancestor;
|
||||
goto bail;
|
||||
}
|
||||
|
||||
if ((inode1_is_ancestor == 1) ||
|
||||
(oi1->ip_blkno < oi2->ip_blkno &&
|
||||
inode2_is_ancestor == 0)) {
|
||||
/* switch id1 and id2 around */
|
||||
tmpbh = bh2;
|
||||
bh2 = bh1;
|
||||
|
@ -1098,6 +1196,7 @@ static int ocfs2_rename(struct inode *old_dir,
|
|||
struct ocfs2_dir_lookup_result old_entry_lookup = { NULL, };
|
||||
struct ocfs2_dir_lookup_result orphan_insert = { NULL, };
|
||||
struct ocfs2_dir_lookup_result target_insert = { NULL, };
|
||||
bool should_add_orphan = false;
|
||||
|
||||
/* At some point it might be nice to break this function up a
|
||||
* bit. */
|
||||
|
@ -1134,6 +1233,21 @@ static int ocfs2_rename(struct inode *old_dir,
|
|||
goto bail;
|
||||
}
|
||||
rename_lock = 1;
|
||||
|
||||
/* here we cannot guarantee the inodes haven't just been
|
||||
* changed, so check if they are nested again */
|
||||
status = ocfs2_check_if_ancestor(osb, new_dir->i_ino,
|
||||
old_inode->i_ino);
|
||||
if (status < 0) {
|
||||
mlog_errno(status);
|
||||
goto bail;
|
||||
} else if (status == 1) {
|
||||
status = -EPERM;
|
||||
trace_ocfs2_rename_not_permitted(
|
||||
(unsigned long long)old_inode->i_ino,
|
||||
(unsigned long long)new_dir->i_ino);
|
||||
goto bail;
|
||||
}
|
||||
}
|
||||
|
||||
/* if old and new are the same, this'll just do one lock. */
|
||||
|
@ -1304,6 +1418,7 @@ static int ocfs2_rename(struct inode *old_dir,
|
|||
mlog_errno(status);
|
||||
goto bail;
|
||||
}
|
||||
should_add_orphan = true;
|
||||
}
|
||||
} else {
|
||||
BUG_ON(new_dentry->d_parent->d_inode != new_dir);
|
||||
|
@ -1348,17 +1463,6 @@ static int ocfs2_rename(struct inode *old_dir,
|
|||
goto bail;
|
||||
}
|
||||
|
||||
if (S_ISDIR(new_inode->i_mode) ||
|
||||
(ocfs2_read_links_count(newfe) == 1)) {
|
||||
status = ocfs2_orphan_add(osb, handle, new_inode,
|
||||
newfe_bh, orphan_name,
|
||||
&orphan_insert, orphan_dir);
|
||||
if (status < 0) {
|
||||
mlog_errno(status);
|
||||
goto bail;
|
||||
}
|
||||
}
|
||||
|
||||
/* change the dirent to point to the correct inode */
|
||||
status = ocfs2_update_entry(new_dir, handle, &target_lookup_res,
|
||||
old_inode);
|
||||
|
@ -1373,6 +1477,15 @@ static int ocfs2_rename(struct inode *old_dir,
|
|||
else
|
||||
ocfs2_add_links_count(newfe, -1);
|
||||
ocfs2_journal_dirty(handle, newfe_bh);
|
||||
if (should_add_orphan) {
|
||||
status = ocfs2_orphan_add(osb, handle, new_inode,
|
||||
newfe_bh, orphan_name,
|
||||
&orphan_insert, orphan_dir);
|
||||
if (status < 0) {
|
||||
mlog_errno(status);
|
||||
goto bail;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
/* if the name was not found in new_dir, add it now */
|
||||
status = ocfs2_add_entry(handle, new_dentry, old_inode,
|
||||
|
@ -1642,6 +1755,7 @@ static int ocfs2_symlink(struct inode *dir,
|
|||
struct ocfs2_dir_lookup_result lookup = { NULL, };
|
||||
sigset_t oldset;
|
||||
int did_block_signals = 0;
|
||||
struct ocfs2_dentry_lock *dl = NULL;
|
||||
|
||||
trace_ocfs2_symlink_begin(dir, dentry, symname,
|
||||
dentry->d_name.len, dentry->d_name.name);
|
||||
|
@ -1830,6 +1944,8 @@ static int ocfs2_symlink(struct inode *dir,
|
|||
goto bail;
|
||||
}
|
||||
|
||||
dl = dentry->d_fsdata;
|
||||
|
||||
status = ocfs2_add_entry(handle, dentry, inode,
|
||||
le64_to_cpu(fe->i_blkno), parent_fe_bh,
|
||||
&lookup);
|
||||
|
@ -1864,6 +1980,9 @@ bail:
|
|||
if (xattr_ac)
|
||||
ocfs2_free_alloc_context(xattr_ac);
|
||||
if ((status < 0) && inode) {
|
||||
if (dl)
|
||||
ocfs2_cleanup_add_entry_failure(osb, dentry, inode);
|
||||
|
||||
OCFS2_I(inode)->ip_flags |= OCFS2_INODE_SKIP_ORPHAN_DIR;
|
||||
clear_nlink(inode);
|
||||
iput(inode);
|
||||
|
|
|
@ -2292,6 +2292,8 @@ TRACE_EVENT(ocfs2_rename,
|
|||
__entry->new_len, __get_str(new_name))
|
||||
);
|
||||
|
||||
DEFINE_OCFS2_ULL_ULL_EVENT(ocfs2_rename_not_permitted);
|
||||
|
||||
TRACE_EVENT(ocfs2_rename_target_exists,
|
||||
TP_PROTO(int new_len, const char *new_name),
|
||||
TP_ARGS(new_len, new_name),
|
||||
|
|
|
@ -4288,9 +4288,16 @@ static int ocfs2_reflink(struct dentry *old_dentry, struct inode *dir,
|
|||
goto out;
|
||||
}
|
||||
|
||||
error = ocfs2_rw_lock(inode, 1);
|
||||
if (error) {
|
||||
mlog_errno(error);
|
||||
goto out;
|
||||
}
|
||||
|
||||
error = ocfs2_inode_lock(inode, &old_bh, 1);
|
||||
if (error) {
|
||||
mlog_errno(error);
|
||||
ocfs2_rw_unlock(inode, 1);
|
||||
goto out;
|
||||
}
|
||||
|
||||
|
@ -4302,6 +4309,7 @@ static int ocfs2_reflink(struct dentry *old_dentry, struct inode *dir,
|
|||
up_write(&OCFS2_I(inode)->ip_xattr_sem);
|
||||
|
||||
ocfs2_inode_unlock(inode, 1);
|
||||
ocfs2_rw_unlock(inode, 1);
|
||||
brelse(old_bh);
|
||||
|
||||
if (error) {
|
||||
|
|
|
@ -1925,15 +1925,11 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err)
|
|||
|
||||
ocfs2_shutdown_local_alloc(osb);
|
||||
|
||||
ocfs2_truncate_log_shutdown(osb);
|
||||
|
||||
/* This will disable recovery and flush any recovery work. */
|
||||
ocfs2_recovery_exit(osb);
|
||||
|
||||
/*
|
||||
* During dismount, when it recovers another node it will call
|
||||
* ocfs2_recover_orphans and queue delayed work osb_truncate_log_wq.
|
||||
*/
|
||||
ocfs2_truncate_log_shutdown(osb);
|
||||
|
||||
ocfs2_journal_shutdown(osb);
|
||||
|
||||
ocfs2_sync_blockdev(sb);
|
||||
|
|
|
@ -32,15 +32,24 @@ static inline void touch_nmi_watchdog(void)
|
|||
#ifdef arch_trigger_all_cpu_backtrace
|
||||
static inline bool trigger_all_cpu_backtrace(void)
|
||||
{
|
||||
arch_trigger_all_cpu_backtrace();
|
||||
arch_trigger_all_cpu_backtrace(true);
|
||||
|
||||
return true;
|
||||
}
|
||||
static inline bool trigger_allbutself_cpu_backtrace(void)
|
||||
{
|
||||
arch_trigger_all_cpu_backtrace(false);
|
||||
return true;
|
||||
}
|
||||
#else
|
||||
static inline bool trigger_all_cpu_backtrace(void)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
static inline bool trigger_allbutself_cpu_backtrace(void)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_LOCKUP_DETECTOR
|
||||
|
@ -48,6 +57,7 @@ int hw_nmi_is_cpu_stuck(struct pt_regs *);
|
|||
u64 hw_nmi_get_sample_period(int watchdog_thresh);
|
||||
extern int watchdog_user_enabled;
|
||||
extern int watchdog_thresh;
|
||||
extern int sysctl_softlockup_all_cpu_backtrace;
|
||||
struct ctl_table;
|
||||
extern int proc_dowatchdog(struct ctl_table *, int ,
|
||||
void __user *, size_t *, loff_t *);
|
||||
|
|
|
@ -360,6 +360,9 @@ static inline void ClearPageCompound(struct page *page)
|
|||
ClearPageHead(page);
|
||||
}
|
||||
#endif
|
||||
|
||||
#define PG_head_mask ((1L << PG_head))
|
||||
|
||||
#else
|
||||
/*
|
||||
* Reduce page flag use as much as possible by overlapping
|
||||
|
|
|
@ -1617,6 +1617,7 @@ static int __init crash_save_vmcoreinfo_init(void)
|
|||
#ifdef CONFIG_MEMORY_FAILURE
|
||||
VMCOREINFO_NUMBER(PG_hwpoison);
|
||||
#endif
|
||||
VMCOREINFO_NUMBER(PG_head_mask);
|
||||
VMCOREINFO_NUMBER(PAGE_BUDDY_MAPCOUNT_VALUE);
|
||||
|
||||
arch_crash_save_vmcoreinfo();
|
||||
|
|
57
kernel/smp.c
57
kernel/smp.c
|
@ -29,6 +29,8 @@ static DEFINE_PER_CPU_SHARED_ALIGNED(struct call_function_data, cfd_data);
|
|||
|
||||
static DEFINE_PER_CPU_SHARED_ALIGNED(struct llist_head, call_single_queue);
|
||||
|
||||
static void flush_smp_call_function_queue(bool warn_cpu_offline);
|
||||
|
||||
static int
|
||||
hotplug_cfd(struct notifier_block *nfb, unsigned long action, void *hcpu)
|
||||
{
|
||||
|
@ -51,12 +53,27 @@ hotplug_cfd(struct notifier_block *nfb, unsigned long action, void *hcpu)
|
|||
#ifdef CONFIG_HOTPLUG_CPU
|
||||
case CPU_UP_CANCELED:
|
||||
case CPU_UP_CANCELED_FROZEN:
|
||||
/* Fall-through to the CPU_DEAD[_FROZEN] case. */
|
||||
|
||||
case CPU_DEAD:
|
||||
case CPU_DEAD_FROZEN:
|
||||
free_cpumask_var(cfd->cpumask);
|
||||
free_percpu(cfd->csd);
|
||||
break;
|
||||
|
||||
case CPU_DYING:
|
||||
case CPU_DYING_FROZEN:
|
||||
/*
|
||||
* The IPIs for the smp-call-function callbacks queued by other
|
||||
* CPUs might arrive late, either due to hardware latencies or
|
||||
* because this CPU disabled interrupts (inside stop-machine)
|
||||
* before the IPIs were sent. So flush out any pending callbacks
|
||||
* explicitly (without waiting for the IPIs to arrive), to
|
||||
* ensure that the outgoing CPU doesn't go offline with work
|
||||
* still pending.
|
||||
*/
|
||||
flush_smp_call_function_queue(false);
|
||||
break;
|
||||
#endif
|
||||
};
|
||||
|
||||
|
@ -177,23 +194,47 @@ static int generic_exec_single(int cpu, struct call_single_data *csd,
|
|||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Invoked by arch to handle an IPI for call function single. Must be
|
||||
* called from the arch with interrupts disabled.
|
||||
/**
|
||||
* generic_smp_call_function_single_interrupt - Execute SMP IPI callbacks
|
||||
*
|
||||
* Invoked by arch to handle an IPI for call function single.
|
||||
* Must be called with interrupts disabled.
|
||||
*/
|
||||
void generic_smp_call_function_single_interrupt(void)
|
||||
{
|
||||
flush_smp_call_function_queue(true);
|
||||
}
|
||||
|
||||
/**
|
||||
* flush_smp_call_function_queue - Flush pending smp-call-function callbacks
|
||||
*
|
||||
* @warn_cpu_offline: If set to 'true', warn if callbacks were queued on an
|
||||
* offline CPU. Skip this check if set to 'false'.
|
||||
*
|
||||
* Flush any pending smp-call-function callbacks queued on this CPU. This is
|
||||
* invoked by the generic IPI handler, as well as by a CPU about to go offline,
|
||||
* to ensure that all pending IPI callbacks are run before it goes completely
|
||||
* offline.
|
||||
*
|
||||
* Loop through the call_single_queue and run all the queued callbacks.
|
||||
* Must be called with interrupts disabled.
|
||||
*/
|
||||
static void flush_smp_call_function_queue(bool warn_cpu_offline)
|
||||
{
|
||||
struct llist_head *head;
|
||||
struct llist_node *entry;
|
||||
struct call_single_data *csd, *csd_next;
|
||||
static bool warned;
|
||||
|
||||
entry = llist_del_all(&__get_cpu_var(call_single_queue));
|
||||
WARN_ON(!irqs_disabled());
|
||||
|
||||
head = &__get_cpu_var(call_single_queue);
|
||||
entry = llist_del_all(head);
|
||||
entry = llist_reverse_order(entry);
|
||||
|
||||
/*
|
||||
* Shouldn't receive this interrupt on a cpu that is not yet online.
|
||||
*/
|
||||
if (unlikely(!cpu_online(smp_processor_id()) && !warned)) {
|
||||
/* There shouldn't be any pending callbacks on an offline CPU. */
|
||||
if (unlikely(warn_cpu_offline && !cpu_online(smp_processor_id()) &&
|
||||
!warned && !llist_empty(head))) {
|
||||
warned = true;
|
||||
WARN(1, "IPI on offline CPU %d\n", smp_processor_id());
|
||||
|
||||
|
|
|
@ -136,7 +136,6 @@ static unsigned long dirty_bytes_min = 2 * PAGE_SIZE;
|
|||
/* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */
|
||||
static int maxolduid = 65535;
|
||||
static int minolduid;
|
||||
static int min_percpu_pagelist_fract = 8;
|
||||
|
||||
static int ngroups_max = NGROUPS_MAX;
|
||||
static const int cap_last_cap = CAP_LAST_CAP;
|
||||
|
@ -861,6 +860,17 @@ static struct ctl_table kern_table[] = {
|
|||
.extra1 = &zero,
|
||||
.extra2 = &one,
|
||||
},
|
||||
#ifdef CONFIG_SMP
|
||||
{
|
||||
.procname = "softlockup_all_cpu_backtrace",
|
||||
.data = &sysctl_softlockup_all_cpu_backtrace,
|
||||
.maxlen = sizeof(int),
|
||||
.mode = 0644,
|
||||
.proc_handler = proc_dointvec_minmax,
|
||||
.extra1 = &zero,
|
||||
.extra2 = &one,
|
||||
},
|
||||
#endif /* CONFIG_SMP */
|
||||
{
|
||||
.procname = "nmi_watchdog",
|
||||
.data = &watchdog_user_enabled,
|
||||
|
@ -1317,7 +1327,7 @@ static struct ctl_table vm_table[] = {
|
|||
.maxlen = sizeof(percpu_pagelist_fraction),
|
||||
.mode = 0644,
|
||||
.proc_handler = percpu_pagelist_fraction_sysctl_handler,
|
||||
.extra1 = &min_percpu_pagelist_fract,
|
||||
.extra1 = &zero,
|
||||
},
|
||||
#ifdef CONFIG_MMU
|
||||
{
|
||||
|
|
|
@ -31,6 +31,12 @@
|
|||
|
||||
int watchdog_user_enabled = 1;
|
||||
int __read_mostly watchdog_thresh = 10;
|
||||
#ifdef CONFIG_SMP
|
||||
int __read_mostly sysctl_softlockup_all_cpu_backtrace;
|
||||
#else
|
||||
#define sysctl_softlockup_all_cpu_backtrace 0
|
||||
#endif
|
||||
|
||||
static int __read_mostly watchdog_running;
|
||||
static u64 __read_mostly sample_period;
|
||||
|
||||
|
@ -47,6 +53,7 @@ static DEFINE_PER_CPU(bool, watchdog_nmi_touch);
|
|||
static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts_saved);
|
||||
static DEFINE_PER_CPU(struct perf_event *, watchdog_ev);
|
||||
#endif
|
||||
static unsigned long soft_lockup_nmi_warn;
|
||||
|
||||
/* boot commands */
|
||||
/*
|
||||
|
@ -95,6 +102,15 @@ static int __init nosoftlockup_setup(char *str)
|
|||
}
|
||||
__setup("nosoftlockup", nosoftlockup_setup);
|
||||
/* */
|
||||
#ifdef CONFIG_SMP
|
||||
static int __init softlockup_all_cpu_backtrace_setup(char *str)
|
||||
{
|
||||
sysctl_softlockup_all_cpu_backtrace =
|
||||
!!simple_strtol(str, NULL, 0);
|
||||
return 1;
|
||||
}
|
||||
__setup("softlockup_all_cpu_backtrace=", softlockup_all_cpu_backtrace_setup);
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Hard-lockup warnings should be triggered after just a few seconds. Soft-
|
||||
|
@ -271,6 +287,7 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
|
|||
unsigned long touch_ts = __this_cpu_read(watchdog_touch_ts);
|
||||
struct pt_regs *regs = get_irq_regs();
|
||||
int duration;
|
||||
int softlockup_all_cpu_backtrace = sysctl_softlockup_all_cpu_backtrace;
|
||||
|
||||
/* kick the hardlockup detector */
|
||||
watchdog_interrupt_count();
|
||||
|
@ -317,6 +334,17 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
|
|||
if (__this_cpu_read(soft_watchdog_warn) == true)
|
||||
return HRTIMER_RESTART;
|
||||
|
||||
if (softlockup_all_cpu_backtrace) {
|
||||
/* Prevent multiple soft-lockup reports if one cpu is already
|
||||
* engaged in dumping cpu back traces
|
||||
*/
|
||||
if (test_and_set_bit(0, &soft_lockup_nmi_warn)) {
|
||||
/* Someone else will report us. Let's give up */
|
||||
__this_cpu_write(soft_watchdog_warn, true);
|
||||
return HRTIMER_RESTART;
|
||||
}
|
||||
}
|
||||
|
||||
printk(KERN_EMERG "BUG: soft lockup - CPU#%d stuck for %us! [%s:%d]\n",
|
||||
smp_processor_id(), duration,
|
||||
current->comm, task_pid_nr(current));
|
||||
|
@ -327,6 +355,17 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
|
|||
else
|
||||
dump_stack();
|
||||
|
||||
if (softlockup_all_cpu_backtrace) {
|
||||
/* Avoid generating two back traces for current
|
||||
* given that one is already made above
|
||||
*/
|
||||
trigger_allbutself_cpu_backtrace();
|
||||
|
||||
clear_bit(0, &soft_lockup_nmi_warn);
|
||||
/* Barrier to sync with other cpus */
|
||||
smp_mb__after_atomic();
|
||||
}
|
||||
|
||||
if (softlockup_panic)
|
||||
panic("softlockup: hung tasks");
|
||||
__this_cpu_write(soft_watchdog_warn, true);
|
||||
|
@ -527,10 +566,8 @@ static void update_timers_all_cpus(void)
|
|||
int cpu;
|
||||
|
||||
get_online_cpus();
|
||||
preempt_disable();
|
||||
for_each_online_cpu(cpu)
|
||||
update_timers(cpu);
|
||||
preempt_enable();
|
||||
put_online_cpus();
|
||||
}
|
||||
|
||||
|
|
|
@ -930,7 +930,7 @@ config LOCKDEP
|
|||
bool
|
||||
depends on DEBUG_KERNEL && TRACE_IRQFLAGS_SUPPORT && STACKTRACE_SUPPORT && LOCKDEP_SUPPORT
|
||||
select STACKTRACE
|
||||
select FRAME_POINTER if !MIPS && !PPC && !ARM_UNWIND && !S390 && !MICROBLAZE && !ARC
|
||||
select FRAME_POINTER if !MIPS && !PPC && !ARM_UNWIND && !S390 && !MICROBLAZE && !ARC && !SCORE
|
||||
select KALLSYMS
|
||||
select KALLSYMS_ALL
|
||||
|
||||
|
@ -1408,7 +1408,7 @@ config FAULT_INJECTION_STACKTRACE_FILTER
|
|||
depends on FAULT_INJECTION_DEBUG_FS && STACKTRACE_SUPPORT
|
||||
depends on !X86_64
|
||||
select STACKTRACE
|
||||
select FRAME_POINTER if !MIPS && !PPC && !S390 && !MICROBLAZE && !ARM_UNWIND && !ARC
|
||||
select FRAME_POINTER if !MIPS && !PPC && !S390 && !MICROBLAZE && !ARM_UNWIND && !ARC && !SCORE
|
||||
help
|
||||
Provide stacktrace filter for fault-injection capabilities
|
||||
|
||||
|
|
|
@ -941,6 +941,37 @@ unlock:
|
|||
spin_unlock(ptl);
|
||||
}
|
||||
|
||||
/*
|
||||
* Save CONFIG_DEBUG_PAGEALLOC from faulting falsely on tail pages
|
||||
* during copy_user_huge_page()'s copy_page_rep(): in the case when
|
||||
* the source page gets split and a tail freed before copy completes.
|
||||
* Called under pmd_lock of checked pmd, so safe from splitting itself.
|
||||
*/
|
||||
static void get_user_huge_page(struct page *page)
|
||||
{
|
||||
if (IS_ENABLED(CONFIG_DEBUG_PAGEALLOC)) {
|
||||
struct page *endpage = page + HPAGE_PMD_NR;
|
||||
|
||||
atomic_add(HPAGE_PMD_NR, &page->_count);
|
||||
while (++page < endpage)
|
||||
get_huge_page_tail(page);
|
||||
} else {
|
||||
get_page(page);
|
||||
}
|
||||
}
|
||||
|
||||
static void put_user_huge_page(struct page *page)
|
||||
{
|
||||
if (IS_ENABLED(CONFIG_DEBUG_PAGEALLOC)) {
|
||||
struct page *endpage = page + HPAGE_PMD_NR;
|
||||
|
||||
while (page < endpage)
|
||||
put_page(page++);
|
||||
} else {
|
||||
put_page(page);
|
||||
}
|
||||
}
|
||||
|
||||
static int do_huge_pmd_wp_page_fallback(struct mm_struct *mm,
|
||||
struct vm_area_struct *vma,
|
||||
unsigned long address,
|
||||
|
@ -1074,7 +1105,7 @@ int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
|
|||
ret |= VM_FAULT_WRITE;
|
||||
goto out_unlock;
|
||||
}
|
||||
get_page(page);
|
||||
get_user_huge_page(page);
|
||||
spin_unlock(ptl);
|
||||
alloc:
|
||||
if (transparent_hugepage_enabled(vma) &&
|
||||
|
@ -1095,7 +1126,7 @@ alloc:
|
|||
split_huge_page(page);
|
||||
ret |= VM_FAULT_FALLBACK;
|
||||
}
|
||||
put_page(page);
|
||||
put_user_huge_page(page);
|
||||
}
|
||||
count_vm_event(THP_FAULT_FALLBACK);
|
||||
goto out;
|
||||
|
@ -1105,7 +1136,7 @@ alloc:
|
|||
put_page(new_page);
|
||||
if (page) {
|
||||
split_huge_page(page);
|
||||
put_page(page);
|
||||
put_user_huge_page(page);
|
||||
} else
|
||||
split_huge_page_pmd(vma, address, pmd);
|
||||
ret |= VM_FAULT_FALLBACK;
|
||||
|
@ -1127,7 +1158,7 @@ alloc:
|
|||
|
||||
spin_lock(ptl);
|
||||
if (page)
|
||||
put_page(page);
|
||||
put_user_huge_page(page);
|
||||
if (unlikely(!pmd_same(*pmd, orig_pmd))) {
|
||||
spin_unlock(ptl);
|
||||
mem_cgroup_uncharge_page(new_page);
|
||||
|
@ -2392,8 +2423,6 @@ static void collapse_huge_page(struct mm_struct *mm,
|
|||
pmd = mm_find_pmd(mm, address);
|
||||
if (!pmd)
|
||||
goto out;
|
||||
if (pmd_trans_huge(*pmd))
|
||||
goto out;
|
||||
|
||||
anon_vma_lock_write(vma->anon_vma);
|
||||
|
||||
|
@ -2492,8 +2521,6 @@ static int khugepaged_scan_pmd(struct mm_struct *mm,
|
|||
pmd = mm_find_pmd(mm, address);
|
||||
if (!pmd)
|
||||
goto out;
|
||||
if (pmd_trans_huge(*pmd))
|
||||
goto out;
|
||||
|
||||
memset(khugepaged_node_load, 0, sizeof(khugepaged_node_load));
|
||||
pte = pte_offset_map_lock(mm, pmd, address, &ptl);
|
||||
|
@ -2846,12 +2873,22 @@ void split_huge_page_pmd_mm(struct mm_struct *mm, unsigned long address,
|
|||
static void split_huge_page_address(struct mm_struct *mm,
|
||||
unsigned long address)
|
||||
{
|
||||
pgd_t *pgd;
|
||||
pud_t *pud;
|
||||
pmd_t *pmd;
|
||||
|
||||
VM_BUG_ON(!(address & ~HPAGE_PMD_MASK));
|
||||
|
||||
pmd = mm_find_pmd(mm, address);
|
||||
if (!pmd)
|
||||
pgd = pgd_offset(mm, address);
|
||||
if (!pgd_present(*pgd))
|
||||
return;
|
||||
|
||||
pud = pud_offset(pgd, address);
|
||||
if (!pud_present(*pud))
|
||||
return;
|
||||
|
||||
pmd = pmd_offset(pud, address);
|
||||
if (!pmd_present(*pmd))
|
||||
return;
|
||||
/*
|
||||
* Caller holds the mmap_sem write mode, so a huge pmd cannot
|
||||
|
|
71
mm/hugetlb.c
71
mm/hugetlb.c
|
@ -2520,6 +2520,31 @@ static void set_huge_ptep_writable(struct vm_area_struct *vma,
|
|||
update_mmu_cache(vma, address, ptep);
|
||||
}
|
||||
|
||||
static int is_hugetlb_entry_migration(pte_t pte)
|
||||
{
|
||||
swp_entry_t swp;
|
||||
|
||||
if (huge_pte_none(pte) || pte_present(pte))
|
||||
return 0;
|
||||
swp = pte_to_swp_entry(pte);
|
||||
if (non_swap_entry(swp) && is_migration_entry(swp))
|
||||
return 1;
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int is_hugetlb_entry_hwpoisoned(pte_t pte)
|
||||
{
|
||||
swp_entry_t swp;
|
||||
|
||||
if (huge_pte_none(pte) || pte_present(pte))
|
||||
return 0;
|
||||
swp = pte_to_swp_entry(pte);
|
||||
if (non_swap_entry(swp) && is_hwpoison_entry(swp))
|
||||
return 1;
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
|
||||
int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
|
||||
struct vm_area_struct *vma)
|
||||
|
@ -2559,10 +2584,26 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
|
|||
dst_ptl = huge_pte_lock(h, dst, dst_pte);
|
||||
src_ptl = huge_pte_lockptr(h, src, src_pte);
|
||||
spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING);
|
||||
if (!huge_pte_none(huge_ptep_get(src_pte))) {
|
||||
entry = huge_ptep_get(src_pte);
|
||||
if (huge_pte_none(entry)) { /* skip none entry */
|
||||
;
|
||||
} else if (unlikely(is_hugetlb_entry_migration(entry) ||
|
||||
is_hugetlb_entry_hwpoisoned(entry))) {
|
||||
swp_entry_t swp_entry = pte_to_swp_entry(entry);
|
||||
|
||||
if (is_write_migration_entry(swp_entry) && cow) {
|
||||
/*
|
||||
* COW mappings require pages in both
|
||||
* parent and child to be set to read.
|
||||
*/
|
||||
make_migration_entry_read(&swp_entry);
|
||||
entry = swp_entry_to_pte(swp_entry);
|
||||
set_huge_pte_at(src, addr, src_pte, entry);
|
||||
}
|
||||
set_huge_pte_at(dst, addr, dst_pte, entry);
|
||||
} else {
|
||||
if (cow)
|
||||
huge_ptep_set_wrprotect(src, addr, src_pte);
|
||||
entry = huge_ptep_get(src_pte);
|
||||
ptepage = pte_page(entry);
|
||||
get_page(ptepage);
|
||||
page_dup_rmap(ptepage);
|
||||
|
@ -2578,32 +2619,6 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
|
|||
return ret;
|
||||
}
|
||||
|
||||
static int is_hugetlb_entry_migration(pte_t pte)
|
||||
{
|
||||
swp_entry_t swp;
|
||||
|
||||
if (huge_pte_none(pte) || pte_present(pte))
|
||||
return 0;
|
||||
swp = pte_to_swp_entry(pte);
|
||||
if (non_swap_entry(swp) && is_migration_entry(swp))
|
||||
return 1;
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int is_hugetlb_entry_hwpoisoned(pte_t pte)
|
||||
{
|
||||
swp_entry_t swp;
|
||||
|
||||
if (huge_pte_none(pte) || pte_present(pte))
|
||||
return 0;
|
||||
swp = pte_to_swp_entry(pte);
|
||||
if (non_swap_entry(swp) && is_hwpoison_entry(swp))
|
||||
return 1;
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
|
||||
void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
|
||||
unsigned long start, unsigned long end,
|
||||
struct page *ref_page)
|
||||
|
|
1
mm/ksm.c
1
mm/ksm.c
|
@ -945,7 +945,6 @@ static int replace_page(struct vm_area_struct *vma, struct page *page,
|
|||
pmd = mm_find_pmd(mm, addr);
|
||||
if (!pmd)
|
||||
goto out;
|
||||
BUG_ON(pmd_trans_huge(*pmd));
|
||||
|
||||
mmun_start = addr;
|
||||
mmun_end = addr + PAGE_SIZE;
|
||||
|
|
|
@ -656,19 +656,18 @@ static unsigned long change_prot_numa(struct vm_area_struct *vma,
|
|||
* @nodes and @flags,) it's isolated and queued to the pagelist which is
|
||||
* passed via @private.)
|
||||
*/
|
||||
static struct vm_area_struct *
|
||||
static int
|
||||
queue_pages_range(struct mm_struct *mm, unsigned long start, unsigned long end,
|
||||
const nodemask_t *nodes, unsigned long flags, void *private)
|
||||
{
|
||||
int err;
|
||||
struct vm_area_struct *first, *vma, *prev;
|
||||
int err = 0;
|
||||
struct vm_area_struct *vma, *prev;
|
||||
|
||||
|
||||
first = find_vma(mm, start);
|
||||
if (!first)
|
||||
return ERR_PTR(-EFAULT);
|
||||
vma = find_vma(mm, start);
|
||||
if (!vma)
|
||||
return -EFAULT;
|
||||
prev = NULL;
|
||||
for (vma = first; vma && vma->vm_start < end; vma = vma->vm_next) {
|
||||
for (; vma && vma->vm_start < end; vma = vma->vm_next) {
|
||||
unsigned long endvma = vma->vm_end;
|
||||
|
||||
if (endvma > end)
|
||||
|
@ -678,9 +677,9 @@ queue_pages_range(struct mm_struct *mm, unsigned long start, unsigned long end,
|
|||
|
||||
if (!(flags & MPOL_MF_DISCONTIG_OK)) {
|
||||
if (!vma->vm_next && vma->vm_end < end)
|
||||
return ERR_PTR(-EFAULT);
|
||||
return -EFAULT;
|
||||
if (prev && prev->vm_end < vma->vm_start)
|
||||
return ERR_PTR(-EFAULT);
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
if (flags & MPOL_MF_LAZY) {
|
||||
|
@ -694,15 +693,13 @@ queue_pages_range(struct mm_struct *mm, unsigned long start, unsigned long end,
|
|||
|
||||
err = queue_pages_pgd_range(vma, start, endvma, nodes,
|
||||
flags, private);
|
||||
if (err) {
|
||||
first = ERR_PTR(err);
|
||||
if (err)
|
||||
break;
|
||||
}
|
||||
}
|
||||
next:
|
||||
prev = vma;
|
||||
}
|
||||
return first;
|
||||
return err;
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -1156,16 +1153,17 @@ out:
|
|||
|
||||
/*
|
||||
* Allocate a new page for page migration based on vma policy.
|
||||
* Start assuming that page is mapped by vma pointed to by @private.
|
||||
* Start by assuming the page is mapped by the same vma as contains @start.
|
||||
* Search forward from there, if not. N.B., this assumes that the
|
||||
* list of pages handed to migrate_pages()--which is how we get here--
|
||||
* is in virtual address order.
|
||||
*/
|
||||
static struct page *new_vma_page(struct page *page, unsigned long private, int **x)
|
||||
static struct page *new_page(struct page *page, unsigned long start, int **x)
|
||||
{
|
||||
struct vm_area_struct *vma = (struct vm_area_struct *)private;
|
||||
struct vm_area_struct *vma;
|
||||
unsigned long uninitialized_var(address);
|
||||
|
||||
vma = find_vma(current->mm, start);
|
||||
while (vma) {
|
||||
address = page_address_in_vma(page, vma);
|
||||
if (address != -EFAULT)
|
||||
|
@ -1195,7 +1193,7 @@ int do_migrate_pages(struct mm_struct *mm, const nodemask_t *from,
|
|||
return -ENOSYS;
|
||||
}
|
||||
|
||||
static struct page *new_vma_page(struct page *page, unsigned long private, int **x)
|
||||
static struct page *new_page(struct page *page, unsigned long start, int **x)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
@ -1205,7 +1203,6 @@ static long do_mbind(unsigned long start, unsigned long len,
|
|||
unsigned short mode, unsigned short mode_flags,
|
||||
nodemask_t *nmask, unsigned long flags)
|
||||
{
|
||||
struct vm_area_struct *vma;
|
||||
struct mm_struct *mm = current->mm;
|
||||
struct mempolicy *new;
|
||||
unsigned long end;
|
||||
|
@ -1271,11 +1268,9 @@ static long do_mbind(unsigned long start, unsigned long len,
|
|||
if (err)
|
||||
goto mpol_out;
|
||||
|
||||
vma = queue_pages_range(mm, start, end, nmask,
|
||||
err = queue_pages_range(mm, start, end, nmask,
|
||||
flags | MPOL_MF_INVERT, &pagelist);
|
||||
|
||||
err = PTR_ERR(vma); /* maybe ... */
|
||||
if (!IS_ERR(vma))
|
||||
if (!err)
|
||||
err = mbind_range(mm, start, end, new);
|
||||
|
||||
if (!err) {
|
||||
|
@ -1283,9 +1278,8 @@ static long do_mbind(unsigned long start, unsigned long len,
|
|||
|
||||
if (!list_empty(&pagelist)) {
|
||||
WARN_ON_ONCE(flags & MPOL_MF_LAZY);
|
||||
nr_failed = migrate_pages(&pagelist, new_vma_page,
|
||||
NULL, (unsigned long)vma,
|
||||
MIGRATE_SYNC, MR_MEMPOLICY_MBIND);
|
||||
nr_failed = migrate_pages(&pagelist, new_page, NULL,
|
||||
start, MIGRATE_SYNC, MR_MEMPOLICY_MBIND);
|
||||
if (nr_failed)
|
||||
putback_movable_pages(&pagelist);
|
||||
}
|
||||
|
|
|
@ -120,8 +120,6 @@ static int remove_migration_pte(struct page *new, struct vm_area_struct *vma,
|
|||
pmd = mm_find_pmd(mm, addr);
|
||||
if (!pmd)
|
||||
goto out;
|
||||
if (pmd_trans_huge(*pmd))
|
||||
goto out;
|
||||
|
||||
ptep = pte_offset_map(pmd, addr);
|
||||
|
||||
|
|
|
@ -786,7 +786,7 @@ static void delete_vma_from_mm(struct vm_area_struct *vma)
|
|||
for (i = 0; i < VMACACHE_SIZE; i++) {
|
||||
/* if the vma is cached, invalidate the entire cache */
|
||||
if (curr->vmacache[i] == vma) {
|
||||
vmacache_invalidate(curr->mm);
|
||||
vmacache_invalidate(mm);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -69,6 +69,7 @@
|
|||
|
||||
/* prevent >1 _updater_ of zone percpu pageset ->high and ->batch fields */
|
||||
static DEFINE_MUTEX(pcp_batch_high_lock);
|
||||
#define MIN_PERCPU_PAGELIST_FRACTION (8)
|
||||
|
||||
#ifdef CONFIG_USE_PERCPU_NUMA_NODE_ID
|
||||
DEFINE_PER_CPU(int, numa_node);
|
||||
|
@ -4145,7 +4146,7 @@ static void __meminit zone_init_free_lists(struct zone *zone)
|
|||
memmap_init_zone((size), (nid), (zone), (start_pfn), MEMMAP_EARLY)
|
||||
#endif
|
||||
|
||||
static int __meminit zone_batchsize(struct zone *zone)
|
||||
static int zone_batchsize(struct zone *zone)
|
||||
{
|
||||
#ifdef CONFIG_MMU
|
||||
int batch;
|
||||
|
@ -4261,8 +4262,8 @@ static void pageset_set_high(struct per_cpu_pageset *p,
|
|||
pageset_update(&p->pcp, high, batch);
|
||||
}
|
||||
|
||||
static void __meminit pageset_set_high_and_batch(struct zone *zone,
|
||||
struct per_cpu_pageset *pcp)
|
||||
static void pageset_set_high_and_batch(struct zone *zone,
|
||||
struct per_cpu_pageset *pcp)
|
||||
{
|
||||
if (percpu_pagelist_fraction)
|
||||
pageset_set_high(pcp,
|
||||
|
@ -5881,23 +5882,38 @@ int percpu_pagelist_fraction_sysctl_handler(struct ctl_table *table, int write,
|
|||
void __user *buffer, size_t *length, loff_t *ppos)
|
||||
{
|
||||
struct zone *zone;
|
||||
unsigned int cpu;
|
||||
int old_percpu_pagelist_fraction;
|
||||
int ret;
|
||||
|
||||
ret = proc_dointvec_minmax(table, write, buffer, length, ppos);
|
||||
if (!write || (ret < 0))
|
||||
return ret;
|
||||
|
||||
mutex_lock(&pcp_batch_high_lock);
|
||||
for_each_populated_zone(zone) {
|
||||
unsigned long high;
|
||||
high = zone->managed_pages / percpu_pagelist_fraction;
|
||||
for_each_possible_cpu(cpu)
|
||||
pageset_set_high(per_cpu_ptr(zone->pageset, cpu),
|
||||
high);
|
||||
old_percpu_pagelist_fraction = percpu_pagelist_fraction;
|
||||
|
||||
ret = proc_dointvec_minmax(table, write, buffer, length, ppos);
|
||||
if (!write || ret < 0)
|
||||
goto out;
|
||||
|
||||
/* Sanity checking to avoid pcp imbalance */
|
||||
if (percpu_pagelist_fraction &&
|
||||
percpu_pagelist_fraction < MIN_PERCPU_PAGELIST_FRACTION) {
|
||||
percpu_pagelist_fraction = old_percpu_pagelist_fraction;
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* No change? */
|
||||
if (percpu_pagelist_fraction == old_percpu_pagelist_fraction)
|
||||
goto out;
|
||||
|
||||
for_each_populated_zone(zone) {
|
||||
unsigned int cpu;
|
||||
|
||||
for_each_possible_cpu(cpu)
|
||||
pageset_set_high_and_batch(zone,
|
||||
per_cpu_ptr(zone->pageset, cpu));
|
||||
}
|
||||
out:
|
||||
mutex_unlock(&pcp_batch_high_lock);
|
||||
return 0;
|
||||
return ret;
|
||||
}
|
||||
|
||||
int hashdist = HASHDIST_DEFAULT;
|
||||
|
|
12
mm/rmap.c
12
mm/rmap.c
|
@ -569,6 +569,7 @@ pmd_t *mm_find_pmd(struct mm_struct *mm, unsigned long address)
|
|||
pgd_t *pgd;
|
||||
pud_t *pud;
|
||||
pmd_t *pmd = NULL;
|
||||
pmd_t pmde;
|
||||
|
||||
pgd = pgd_offset(mm, address);
|
||||
if (!pgd_present(*pgd))
|
||||
|
@ -579,7 +580,13 @@ pmd_t *mm_find_pmd(struct mm_struct *mm, unsigned long address)
|
|||
goto out;
|
||||
|
||||
pmd = pmd_offset(pud, address);
|
||||
if (!pmd_present(*pmd))
|
||||
/*
|
||||
* Some THP functions use the sequence pmdp_clear_flush(), set_pmd_at()
|
||||
* without holding anon_vma lock for write. So when looking for a
|
||||
* genuine pmde (in which to find pte), test present and !THP together.
|
||||
*/
|
||||
pmde = ACCESS_ONCE(*pmd);
|
||||
if (!pmd_present(pmde) || pmd_trans_huge(pmde))
|
||||
pmd = NULL;
|
||||
out:
|
||||
return pmd;
|
||||
|
@ -615,9 +622,6 @@ pte_t *__page_check_address(struct page *page, struct mm_struct *mm,
|
|||
if (!pmd)
|
||||
return NULL;
|
||||
|
||||
if (pmd_trans_huge(*pmd))
|
||||
return NULL;
|
||||
|
||||
pte = pte_offset_map(pmd, address);
|
||||
/* Make a quick check before getting the lock */
|
||||
if (!sync && !pte_present(*pte)) {
|
||||
|
|
59
mm/shmem.c
59
mm/shmem.c
|
@ -80,11 +80,12 @@ static struct vfsmount *shm_mnt;
|
|||
#define SHORT_SYMLINK_LEN 128
|
||||
|
||||
/*
|
||||
* shmem_fallocate and shmem_writepage communicate via inode->i_private
|
||||
* (with i_mutex making sure that it has only one user at a time):
|
||||
* we would prefer not to enlarge the shmem inode just for that.
|
||||
* shmem_fallocate communicates with shmem_fault or shmem_writepage via
|
||||
* inode->i_private (with i_mutex making sure that it has only one user at
|
||||
* a time): we would prefer not to enlarge the shmem inode just for that.
|
||||
*/
|
||||
struct shmem_falloc {
|
||||
int mode; /* FALLOC_FL mode currently operating */
|
||||
pgoff_t start; /* start of range currently being fallocated */
|
||||
pgoff_t next; /* the next page offset to be fallocated */
|
||||
pgoff_t nr_falloced; /* how many new pages have been fallocated */
|
||||
|
@ -759,6 +760,7 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc)
|
|||
spin_lock(&inode->i_lock);
|
||||
shmem_falloc = inode->i_private;
|
||||
if (shmem_falloc &&
|
||||
!shmem_falloc->mode &&
|
||||
index >= shmem_falloc->start &&
|
||||
index < shmem_falloc->next)
|
||||
shmem_falloc->nr_unswapped++;
|
||||
|
@ -1233,6 +1235,44 @@ static int shmem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
|
|||
int error;
|
||||
int ret = VM_FAULT_LOCKED;
|
||||
|
||||
/*
|
||||
* Trinity finds that probing a hole which tmpfs is punching can
|
||||
* prevent the hole-punch from ever completing: which in turn
|
||||
* locks writers out with its hold on i_mutex. So refrain from
|
||||
* faulting pages into the hole while it's being punched, and
|
||||
* wait on i_mutex to be released if vmf->flags permits.
|
||||
*/
|
||||
if (unlikely(inode->i_private)) {
|
||||
struct shmem_falloc *shmem_falloc;
|
||||
|
||||
spin_lock(&inode->i_lock);
|
||||
shmem_falloc = inode->i_private;
|
||||
if (!shmem_falloc ||
|
||||
shmem_falloc->mode != FALLOC_FL_PUNCH_HOLE ||
|
||||
vmf->pgoff < shmem_falloc->start ||
|
||||
vmf->pgoff >= shmem_falloc->next)
|
||||
shmem_falloc = NULL;
|
||||
spin_unlock(&inode->i_lock);
|
||||
/*
|
||||
* i_lock has protected us from taking shmem_falloc seriously
|
||||
* once return from shmem_fallocate() went back up that stack.
|
||||
* i_lock does not serialize with i_mutex at all, but it does
|
||||
* not matter if sometimes we wait unnecessarily, or sometimes
|
||||
* miss out on waiting: we just need to make those cases rare.
|
||||
*/
|
||||
if (shmem_falloc) {
|
||||
if ((vmf->flags & FAULT_FLAG_ALLOW_RETRY) &&
|
||||
!(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) {
|
||||
up_read(&vma->vm_mm->mmap_sem);
|
||||
mutex_lock(&inode->i_mutex);
|
||||
mutex_unlock(&inode->i_mutex);
|
||||
return VM_FAULT_RETRY;
|
||||
}
|
||||
/* cond_resched? Leave that to GUP or return to user */
|
||||
return VM_FAULT_NOPAGE;
|
||||
}
|
||||
}
|
||||
|
||||
error = shmem_getpage(inode, vmf->pgoff, &vmf->page, SGP_CACHE, &ret);
|
||||
if (error)
|
||||
return ((error == -ENOMEM) ? VM_FAULT_OOM : VM_FAULT_SIGBUS);
|
||||
|
@ -1724,20 +1764,31 @@ static long shmem_fallocate(struct file *file, int mode, loff_t offset,
|
|||
pgoff_t start, index, end;
|
||||
int error;
|
||||
|
||||
if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
mutex_lock(&inode->i_mutex);
|
||||
|
||||
shmem_falloc.mode = mode & ~FALLOC_FL_KEEP_SIZE;
|
||||
|
||||
if (mode & FALLOC_FL_PUNCH_HOLE) {
|
||||
struct address_space *mapping = file->f_mapping;
|
||||
loff_t unmap_start = round_up(offset, PAGE_SIZE);
|
||||
loff_t unmap_end = round_down(offset + len, PAGE_SIZE) - 1;
|
||||
|
||||
shmem_falloc.start = unmap_start >> PAGE_SHIFT;
|
||||
shmem_falloc.next = (unmap_end + 1) >> PAGE_SHIFT;
|
||||
spin_lock(&inode->i_lock);
|
||||
inode->i_private = &shmem_falloc;
|
||||
spin_unlock(&inode->i_lock);
|
||||
|
||||
if ((u64)unmap_end > (u64)unmap_start)
|
||||
unmap_mapping_range(mapping, unmap_start,
|
||||
1 + unmap_end - unmap_start, 0);
|
||||
shmem_truncate_range(inode, offset, offset + len - 1);
|
||||
/* No need to unmap again: hole-punching leaves COWed pages */
|
||||
error = 0;
|
||||
goto out;
|
||||
goto undone;
|
||||
}
|
||||
|
||||
/* We need to check rlimit even when FALLOC_FL_KEEP_SIZE */
|
||||
|
|
90
mm/slab.c
90
mm/slab.c
|
@ -386,6 +386,39 @@ static void **dbg_userword(struct kmem_cache *cachep, void *objp)
|
|||
|
||||
#endif
|
||||
|
||||
#define OBJECT_FREE (0)
|
||||
#define OBJECT_ACTIVE (1)
|
||||
|
||||
#ifdef CONFIG_DEBUG_SLAB_LEAK
|
||||
|
||||
static void set_obj_status(struct page *page, int idx, int val)
|
||||
{
|
||||
int freelist_size;
|
||||
char *status;
|
||||
struct kmem_cache *cachep = page->slab_cache;
|
||||
|
||||
freelist_size = cachep->num * sizeof(freelist_idx_t);
|
||||
status = (char *)page->freelist + freelist_size;
|
||||
status[idx] = val;
|
||||
}
|
||||
|
||||
static inline unsigned int get_obj_status(struct page *page, int idx)
|
||||
{
|
||||
int freelist_size;
|
||||
char *status;
|
||||
struct kmem_cache *cachep = page->slab_cache;
|
||||
|
||||
freelist_size = cachep->num * sizeof(freelist_idx_t);
|
||||
status = (char *)page->freelist + freelist_size;
|
||||
|
||||
return status[idx];
|
||||
}
|
||||
|
||||
#else
|
||||
static inline void set_obj_status(struct page *page, int idx, int val) {}
|
||||
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Do not go above this order unless 0 objects fit into the slab or
|
||||
* overridden on the command line.
|
||||
|
@ -576,12 +609,30 @@ static inline struct array_cache *cpu_cache_get(struct kmem_cache *cachep)
|
|||
return cachep->array[smp_processor_id()];
|
||||
}
|
||||
|
||||
static size_t calculate_freelist_size(int nr_objs, size_t align)
|
||||
{
|
||||
size_t freelist_size;
|
||||
|
||||
freelist_size = nr_objs * sizeof(freelist_idx_t);
|
||||
if (IS_ENABLED(CONFIG_DEBUG_SLAB_LEAK))
|
||||
freelist_size += nr_objs * sizeof(char);
|
||||
|
||||
if (align)
|
||||
freelist_size = ALIGN(freelist_size, align);
|
||||
|
||||
return freelist_size;
|
||||
}
|
||||
|
||||
static int calculate_nr_objs(size_t slab_size, size_t buffer_size,
|
||||
size_t idx_size, size_t align)
|
||||
{
|
||||
int nr_objs;
|
||||
size_t remained_size;
|
||||
size_t freelist_size;
|
||||
int extra_space = 0;
|
||||
|
||||
if (IS_ENABLED(CONFIG_DEBUG_SLAB_LEAK))
|
||||
extra_space = sizeof(char);
|
||||
/*
|
||||
* Ignore padding for the initial guess. The padding
|
||||
* is at most @align-1 bytes, and @buffer_size is at
|
||||
|
@ -590,14 +641,15 @@ static int calculate_nr_objs(size_t slab_size, size_t buffer_size,
|
|||
* into the memory allocation when taking the padding
|
||||
* into account.
|
||||
*/
|
||||
nr_objs = slab_size / (buffer_size + idx_size);
|
||||
nr_objs = slab_size / (buffer_size + idx_size + extra_space);
|
||||
|
||||
/*
|
||||
* This calculated number will be either the right
|
||||
* amount, or one greater than what we want.
|
||||
*/
|
||||
freelist_size = slab_size - nr_objs * buffer_size;
|
||||
if (freelist_size < ALIGN(nr_objs * idx_size, align))
|
||||
remained_size = slab_size - nr_objs * buffer_size;
|
||||
freelist_size = calculate_freelist_size(nr_objs, align);
|
||||
if (remained_size < freelist_size)
|
||||
nr_objs--;
|
||||
|
||||
return nr_objs;
|
||||
|
@ -635,7 +687,7 @@ static void cache_estimate(unsigned long gfporder, size_t buffer_size,
|
|||
} else {
|
||||
nr_objs = calculate_nr_objs(slab_size, buffer_size,
|
||||
sizeof(freelist_idx_t), align);
|
||||
mgmt_size = ALIGN(nr_objs * sizeof(freelist_idx_t), align);
|
||||
mgmt_size = calculate_freelist_size(nr_objs, align);
|
||||
}
|
||||
*num = nr_objs;
|
||||
*left_over = slab_size - nr_objs*buffer_size - mgmt_size;
|
||||
|
@ -2041,13 +2093,16 @@ static size_t calculate_slab_order(struct kmem_cache *cachep,
|
|||
break;
|
||||
|
||||
if (flags & CFLGS_OFF_SLAB) {
|
||||
size_t freelist_size_per_obj = sizeof(freelist_idx_t);
|
||||
/*
|
||||
* Max number of objs-per-slab for caches which
|
||||
* use off-slab slabs. Needed to avoid a possible
|
||||
* looping condition in cache_grow().
|
||||
*/
|
||||
if (IS_ENABLED(CONFIG_DEBUG_SLAB_LEAK))
|
||||
freelist_size_per_obj += sizeof(char);
|
||||
offslab_limit = size;
|
||||
offslab_limit /= sizeof(freelist_idx_t);
|
||||
offslab_limit /= freelist_size_per_obj;
|
||||
|
||||
if (num > offslab_limit)
|
||||
break;
|
||||
|
@ -2294,8 +2349,7 @@ __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags)
|
|||
if (!cachep->num)
|
||||
return -E2BIG;
|
||||
|
||||
freelist_size =
|
||||
ALIGN(cachep->num * sizeof(freelist_idx_t), cachep->align);
|
||||
freelist_size = calculate_freelist_size(cachep->num, cachep->align);
|
||||
|
||||
/*
|
||||
* If the slab has been placed off-slab, and we have enough space then
|
||||
|
@ -2308,7 +2362,7 @@ __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags)
|
|||
|
||||
if (flags & CFLGS_OFF_SLAB) {
|
||||
/* really off slab. No need for manual alignment */
|
||||
freelist_size = cachep->num * sizeof(freelist_idx_t);
|
||||
freelist_size = calculate_freelist_size(cachep->num, 0);
|
||||
|
||||
#ifdef CONFIG_PAGE_POISONING
|
||||
/* If we're going to use the generic kernel_map_pages()
|
||||
|
@ -2612,6 +2666,7 @@ static void cache_init_objs(struct kmem_cache *cachep,
|
|||
if (cachep->ctor)
|
||||
cachep->ctor(objp);
|
||||
#endif
|
||||
set_obj_status(page, i, OBJECT_FREE);
|
||||
set_free_obj(page, i, i);
|
||||
}
|
||||
}
|
||||
|
@ -2820,6 +2875,7 @@ static void *cache_free_debugcheck(struct kmem_cache *cachep, void *objp,
|
|||
BUG_ON(objnr >= cachep->num);
|
||||
BUG_ON(objp != index_to_obj(cachep, page, objnr));
|
||||
|
||||
set_obj_status(page, objnr, OBJECT_FREE);
|
||||
if (cachep->flags & SLAB_POISON) {
|
||||
#ifdef CONFIG_DEBUG_PAGEALLOC
|
||||
if ((cachep->size % PAGE_SIZE)==0 && OFF_SLAB(cachep)) {
|
||||
|
@ -2953,6 +3009,8 @@ static inline void cache_alloc_debugcheck_before(struct kmem_cache *cachep,
|
|||
static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep,
|
||||
gfp_t flags, void *objp, unsigned long caller)
|
||||
{
|
||||
struct page *page;
|
||||
|
||||
if (!objp)
|
||||
return objp;
|
||||
if (cachep->flags & SLAB_POISON) {
|
||||
|
@ -2983,6 +3041,9 @@ static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep,
|
|||
*dbg_redzone1(cachep, objp) = RED_ACTIVE;
|
||||
*dbg_redzone2(cachep, objp) = RED_ACTIVE;
|
||||
}
|
||||
|
||||
page = virt_to_head_page(objp);
|
||||
set_obj_status(page, obj_to_index(cachep, page, objp), OBJECT_ACTIVE);
|
||||
objp += obj_offset(cachep);
|
||||
if (cachep->ctor && cachep->flags & SLAB_POISON)
|
||||
cachep->ctor(objp);
|
||||
|
@ -4219,21 +4280,12 @@ static void handle_slab(unsigned long *n, struct kmem_cache *c,
|
|||
struct page *page)
|
||||
{
|
||||
void *p;
|
||||
int i, j;
|
||||
int i;
|
||||
|
||||
if (n[0] == n[1])
|
||||
return;
|
||||
for (i = 0, p = page->s_mem; i < c->num; i++, p += c->size) {
|
||||
bool active = true;
|
||||
|
||||
for (j = page->active; j < c->num; j++) {
|
||||
/* Skip freed item */
|
||||
if (get_free_obj(page, j) == i) {
|
||||
active = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!active)
|
||||
if (get_obj_status(page, i) != OBJECT_ACTIVE)
|
||||
continue;
|
||||
|
||||
if (!add_caller(n, (unsigned long)*dbg_userword(c, p)))
|
||||
|
|
|
@ -3476,12 +3476,17 @@ sub process {
|
|||
}
|
||||
}
|
||||
|
||||
# unnecessary return in a void function? (a single leading tab, then return;)
|
||||
if ($sline =~ /^\+\treturn\s*;\s*$/ &&
|
||||
$prevline =~ /^\+/) {
|
||||
# unnecessary return in a void function
|
||||
# at end-of-function, with the previous line a single leading tab, then return;
|
||||
# and the line before that not a goto label target like "out:"
|
||||
if ($sline =~ /^[ \+]}\s*$/ &&
|
||||
$prevline =~ /^\+\treturn\s*;\s*$/ &&
|
||||
$linenr >= 3 &&
|
||||
$lines[$linenr - 3] =~ /^[ +]/ &&
|
||||
$lines[$linenr - 3] !~ /^[ +]\s*$Ident\s*:/) {
|
||||
WARN("RETURN_VOID",
|
||||
"void function return statements are not generally useful\n" . $herecurr);
|
||||
}
|
||||
"void function return statements are not generally useful\n" . $hereprev);
|
||||
}
|
||||
|
||||
# if statements using unnecessary parentheses - ie: if ((foo == bar))
|
||||
if ($^V && $^V ge 5.10.0 &&
|
||||
|
|
Loading…
Reference in New Issue