Merge branch 'locking/urgent' into locking/core, to pick up dependent fixes

Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
Ingo Molnar 2016-12-02 11:13:44 +01:00
commit 1b95b1a06c
38 changed files with 367 additions and 150 deletions

View File

@ -218,8 +218,8 @@ void do_timer_interrupt(struct pt_regs *regs, int fault_num)
*/
unsigned long long sched_clock(void)
{
return clocksource_cyc2ns(get_cycles(),
sched_clock_mult, SCHED_CLOCK_SHIFT);
return mult_frac(get_cycles(),
sched_clock_mult, 1ULL << SCHED_CLOCK_SHIFT);
}
int setup_profiling_timer(unsigned int multiplier)

View File

@ -40,8 +40,8 @@ GCOV_PROFILE := n
UBSAN_SANITIZE :=n
LDFLAGS := -m elf_$(UTS_MACHINE)
ifeq ($(CONFIG_RELOCATABLE),y)
# If kernel is relocatable, build compressed kernel as PIE.
# Compressed kernel should be built as PIE since it may be loaded at any
# address by the bootloader.
ifeq ($(CONFIG_X86_32),y)
LDFLAGS += $(call ld-option, -pie) $(call ld-option, --no-dynamic-linker)
else
@ -51,7 +51,6 @@ else
LDFLAGS += $(shell $(LD) --help 2>&1 | grep -q "\-z noreloc-overflow" \
&& echo "-z noreloc-overflow -pie --no-dynamic-linker")
endif
endif
LDFLAGS_vmlinux := -T
hostprogs-y := mkpiggy

View File

@ -87,6 +87,12 @@ int validate_cpu(void)
return -1;
}
if (CONFIG_X86_MINIMUM_CPU_FAMILY <= 4 && !IS_ENABLED(CONFIG_M486) &&
!has_eflag(X86_EFLAGS_ID)) {
printf("This kernel requires a CPU with the CPUID instruction. Build with CONFIG_M486=y to run on this CPU.\n");
return -1;
}
if (err_flags) {
puts("This kernel requires the following features "
"not present on the CPU:\n");

View File

@ -662,7 +662,13 @@ static int __init amd_core_pmu_init(void)
pr_cont("Fam15h ");
x86_pmu.get_event_constraints = amd_get_event_constraints_f15h;
break;
case 0x17:
pr_cont("Fam17h ");
/*
* In family 17h, there are no event constraints in the PMC hardware.
* We fallback to using default amd_get_event_constraints.
*/
break;
default:
pr_err("core perfctr but no constraints; unknown hardware!\n");
return -ENODEV;

View File

@ -2352,7 +2352,7 @@ perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry_ctx *ent
frame.next_frame = 0;
frame.return_address = 0;
if (!access_ok(VERIFY_READ, fp, 8))
if (!valid_user_frame(fp, sizeof(frame)))
break;
bytes = __copy_from_user_nmi(&frame.next_frame, fp, 4);
@ -2362,9 +2362,6 @@ perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry_ctx *ent
if (bytes != 0)
break;
if (!valid_user_frame(fp, sizeof(frame)))
break;
perf_callchain_store(entry, cs_base + frame.return_address);
fp = compat_ptr(ss_base + frame.next_frame);
}
@ -2413,7 +2410,7 @@ perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs
frame.next_frame = NULL;
frame.return_address = 0;
if (!access_ok(VERIFY_READ, fp, sizeof(*fp) * 2))
if (!valid_user_frame(fp, sizeof(frame)))
break;
bytes = __copy_from_user_nmi(&frame.next_frame, fp, sizeof(*fp));
@ -2423,9 +2420,6 @@ perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs
if (bytes != 0)
break;
if (!valid_user_frame(fp, sizeof(frame)))
break;
perf_callchain_store(entry, frame.return_address);
fp = (void __user *)frame.next_frame;
}

View File

@ -1108,20 +1108,20 @@ static void setup_pebs_sample_data(struct perf_event *event,
}
/*
* We use the interrupt regs as a base because the PEBS record
* does not contain a full regs set, specifically it seems to
* lack segment descriptors, which get used by things like
* user_mode().
* We use the interrupt regs as a base because the PEBS record does not
* contain a full regs set, specifically it seems to lack segment
* descriptors, which get used by things like user_mode().
*
* In the simple case fix up only the IP and BP,SP regs, for
* PERF_SAMPLE_IP and PERF_SAMPLE_CALLCHAIN to function properly.
* A possible PERF_SAMPLE_REGS will have to transfer all regs.
* In the simple case fix up only the IP for PERF_SAMPLE_IP.
*
* We must however always use BP,SP from iregs for the unwinder to stay
* sane; the record BP,SP can point into thin air when the record is
* from a previous PMI context or an (I)RET happend between the record
* and PMI.
*/
*regs = *iregs;
regs->flags = pebs->flags;
set_linear_ip(regs, pebs->ip);
regs->bp = pebs->bp;
regs->sp = pebs->sp;
if (sample_type & PERF_SAMPLE_REGS_INTR) {
regs->ax = pebs->ax;
@ -1130,10 +1130,21 @@ static void setup_pebs_sample_data(struct perf_event *event,
regs->dx = pebs->dx;
regs->si = pebs->si;
regs->di = pebs->di;
regs->bp = pebs->bp;
regs->sp = pebs->sp;
regs->flags = pebs->flags;
/*
* Per the above; only set BP,SP if we don't need callchains.
*
* XXX: does this make sense?
*/
if (!(sample_type & PERF_SAMPLE_CALLCHAIN)) {
regs->bp = pebs->bp;
regs->sp = pebs->sp;
}
/*
* Preserve PERF_EFLAGS_VM from set_linear_ip().
*/
regs->flags = pebs->flags | (regs->flags & PERF_EFLAGS_VM);
#ifndef CONFIG_X86_32
regs->r8 = pebs->r8;
regs->r9 = pebs->r9;

View File

@ -319,9 +319,9 @@ static struct intel_uncore_box *uncore_alloc_box(struct intel_uncore_type *type,
*/
static int uncore_pmu_event_init(struct perf_event *event);
static bool is_uncore_event(struct perf_event *event)
static bool is_box_event(struct intel_uncore_box *box, struct perf_event *event)
{
return event->pmu->event_init == uncore_pmu_event_init;
return &box->pmu->pmu == event->pmu;
}
static int
@ -340,7 +340,7 @@ uncore_collect_events(struct intel_uncore_box *box, struct perf_event *leader,
n = box->n_events;
if (is_uncore_event(leader)) {
if (is_box_event(box, leader)) {
box->event_list[n] = leader;
n++;
}
@ -349,7 +349,7 @@ uncore_collect_events(struct intel_uncore_box *box, struct perf_event *leader,
return n;
list_for_each_entry(event, &leader->sibling_list, group_entry) {
if (!is_uncore_event(event) ||
if (!is_box_event(box, event) ||
event->state <= PERF_EVENT_STATE_OFF)
continue;

View File

@ -490,24 +490,12 @@ static int snb_uncore_imc_event_add(struct perf_event *event, int flags)
snb_uncore_imc_event_start(event, 0);
box->n_events++;
return 0;
}
static void snb_uncore_imc_event_del(struct perf_event *event, int flags)
{
struct intel_uncore_box *box = uncore_event_to_box(event);
int i;
snb_uncore_imc_event_stop(event, PERF_EF_UPDATE);
for (i = 0; i < box->n_events; i++) {
if (event == box->event_list[i]) {
--box->n_events;
break;
}
}
}
int snb_pci2phy_map_init(int devid)

View File

@ -113,7 +113,7 @@ struct debug_store {
* Per register state.
*/
struct er_account {
raw_spinlock_t lock; /* per-core: protect structure */
raw_spinlock_t lock; /* per-core: protect structure */
u64 config; /* extra MSR config */
u64 reg; /* extra MSR number */
atomic_t ref; /* reference count */

View File

@ -112,7 +112,7 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
for (; stack < stack_info.end; stack++) {
unsigned long real_addr;
int reliable = 0;
unsigned long addr = *stack;
unsigned long addr = READ_ONCE_NOCHECK(*stack);
unsigned long *ret_addr_p =
unwind_get_return_address_ptr(&state);

View File

@ -521,14 +521,14 @@ void fpu__clear(struct fpu *fpu)
{
WARN_ON_FPU(fpu != &current->thread.fpu); /* Almost certainly an anomaly */
if (!use_eager_fpu() || !static_cpu_has(X86_FEATURE_FPU)) {
/* FPU state will be reallocated lazily at the first use. */
fpu__drop(fpu);
} else {
if (!fpu->fpstate_active) {
fpu__activate_curr(fpu);
user_fpu_begin();
}
fpu__drop(fpu);
/*
* Make sure fpstate is cleared and initialized.
*/
if (static_cpu_has(X86_FEATURE_FPU)) {
fpu__activate_curr(fpu);
user_fpu_begin();
copy_init_fpstate_to_fpregs();
}
}

View File

@ -665,14 +665,17 @@ __PAGE_ALIGNED_BSS
initial_pg_pmd:
.fill 1024*KPMDS,4,0
#else
ENTRY(initial_page_table)
.globl initial_page_table
initial_page_table:
.fill 1024,4,0
#endif
initial_pg_fixmap:
.fill 1024,4,0
ENTRY(empty_zero_page)
.globl empty_zero_page
empty_zero_page:
.fill 4096,1,0
ENTRY(swapper_pg_dir)
.globl swapper_pg_dir
swapper_pg_dir:
.fill 1024,4,0
EXPORT_SYMBOL(empty_zero_page)

View File

@ -66,13 +66,36 @@ __init int create_simplefb(const struct screen_info *si,
{
struct platform_device *pd;
struct resource res;
unsigned long len;
u64 base, size;
u32 length;
/* don't use lfb_size as it may contain the whole VMEM instead of only
* the part that is occupied by the framebuffer */
len = mode->height * mode->stride;
len = PAGE_ALIGN(len);
if (len > (u64)si->lfb_size << 16) {
/*
* If the 64BIT_BASE capability is set, ext_lfb_base will contain the
* upper half of the base address. Assemble the address, then make sure
* it is valid and we can actually access it.
*/
base = si->lfb_base;
if (si->capabilities & VIDEO_CAPABILITY_64BIT_BASE)
base |= (u64)si->ext_lfb_base << 32;
if (!base || (u64)(resource_size_t)base != base) {
printk(KERN_DEBUG "sysfb: inaccessible VRAM base\n");
return -EINVAL;
}
/*
* Don't use lfb_size as IORESOURCE size, since it may contain the
* entire VMEM, and thus require huge mappings. Use just the part we
* need, that is, the part where the framebuffer is located. But verify
* that it does not exceed the advertised VMEM.
* Note that in case of VBE, the lfb_size is shifted by 16 bits for
* historical reasons.
*/
size = si->lfb_size;
if (si->orig_video_isVGA == VIDEO_TYPE_VLFB)
size <<= 16;
length = mode->height * mode->stride;
length = PAGE_ALIGN(length);
if (length > size) {
printk(KERN_WARNING "sysfb: VRAM smaller than advertised\n");
return -EINVAL;
}
@ -81,8 +104,8 @@ __init int create_simplefb(const struct screen_info *si,
memset(&res, 0, sizeof(res));
res.flags = IORESOURCE_MEM | IORESOURCE_BUSY;
res.name = simplefb_resname;
res.start = si->lfb_base;
res.end = si->lfb_base + len - 1;
res.start = base;
res.end = res.start + length - 1;
if (res.end <= res.start)
return -EINVAL;

View File

@ -7,11 +7,13 @@
unsigned long unwind_get_return_address(struct unwind_state *state)
{
unsigned long addr = READ_ONCE_NOCHECK(*state->sp);
if (unwind_done(state))
return 0;
return ftrace_graph_ret_addr(state->task, &state->graph_idx,
*state->sp, state->sp);
addr, state->sp);
}
EXPORT_SYMBOL_GPL(unwind_get_return_address);
@ -23,8 +25,10 @@ bool unwind_next_frame(struct unwind_state *state)
return false;
do {
unsigned long addr = READ_ONCE_NOCHECK(*state->sp);
for (state->sp++; state->sp < info->end; state->sp++)
if (__kernel_text_address(*state->sp))
if (__kernel_text_address(addr))
return true;
state->sp = info->next_sp;

View File

@ -135,7 +135,12 @@ void __init early_fixup_exception(struct pt_regs *regs, int trapnr)
if (early_recursion_flag > 2)
goto halt_loop;
if (regs->cs != __KERNEL_CS)
/*
* Old CPUs leave the high bits of CS on the stack
* undefined. I'm not sure which CPUs do this, but at least
* the 486 DX works this way.
*/
if ((regs->cs & 0xFFFF) != __KERNEL_CS)
goto fail;
/*

View File

@ -28,4 +28,4 @@ obj-$(subst m,y,$(CONFIG_GPIO_PCA953X)) += platform_pcal9555a.o
obj-$(subst m,y,$(CONFIG_GPIO_PCA953X)) += platform_tca6416.o
# MISC Devices
obj-$(subst m,y,$(CONFIG_KEYBOARD_GPIO)) += platform_gpio_keys.o
obj-$(subst m,y,$(CONFIG_INTEL_MID_WATCHDOG)) += platform_wdt.o
obj-$(subst m,y,$(CONFIG_INTEL_MID_WATCHDOG)) += platform_mrfld_wdt.o

View File

@ -1,5 +1,5 @@
/*
* platform_wdt.c: Watchdog platform library file
* Intel Merrifield watchdog platform device library file
*
* (C) Copyright 2014 Intel Corporation
* Author: David Cohen <david.a.cohen@linux.intel.com>
@ -14,7 +14,9 @@
#include <linux/interrupt.h>
#include <linux/platform_device.h>
#include <linux/platform_data/intel-mid_wdt.h>
#include <asm/intel-mid.h>
#include <asm/intel_scu_ipc.h>
#include <asm/io_apic.h>
#define TANGIER_EXT_TIMER0_MSI 15
@ -50,14 +52,34 @@ static struct intel_mid_wdt_pdata tangier_pdata = {
.probe = tangier_probe,
};
static int __init register_mid_wdt(void)
static int wdt_scu_status_change(struct notifier_block *nb,
unsigned long code, void *data)
{
if (intel_mid_identify_cpu() == INTEL_MID_CPU_CHIP_TANGIER) {
wdt_dev.dev.platform_data = &tangier_pdata;
return platform_device_register(&wdt_dev);
if (code == SCU_DOWN) {
platform_device_unregister(&wdt_dev);
return 0;
}
return -ENODEV;
return platform_device_register(&wdt_dev);
}
static struct notifier_block wdt_scu_notifier = {
.notifier_call = wdt_scu_status_change,
};
static int __init register_mid_wdt(void)
{
if (intel_mid_identify_cpu() != INTEL_MID_CPU_CHIP_TANGIER)
return -ENODEV;
wdt_dev.dev.platform_data = &tangier_pdata;
/*
* We need to be sure that the SCU IPC is ready before watchdog device
* can be registered:
*/
intel_scu_notifier_add(&wdt_scu_notifier);
return 0;
}
rootfs_initcall(register_mid_wdt);

View File

@ -214,7 +214,7 @@ static int hash_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
ahash_request_set_crypt(&ctx->req, NULL, ctx->result, 0);
if (!result) {
if (!result && !ctx->more) {
err = af_alg_wait_for_completion(
crypto_ahash_init(&ctx->req),
&ctx->completion);

View File

@ -68,10 +68,6 @@ void scatterwalk_map_and_copy(void *buf, struct scatterlist *sg,
sg = scatterwalk_ffwd(tmp, sg, start);
if (sg_page(sg) == virt_to_page(buf) &&
sg->offset == offset_in_page(buf))
return;
scatterwalk_start(&walk, sg);
scatterwalk_copychunks(buf, &walk, nbytes, out);
scatterwalk_done(&walk, out, 0);

View File

@ -685,7 +685,7 @@ static void __init berlin2_clock_setup(struct device_node *np)
}
/* register clk-provider */
of_clk_add_hw_provider(np, of_clk_hw_onecell_get, &clk_data);
of_clk_add_hw_provider(np, of_clk_hw_onecell_get, clk_data);
return;

View File

@ -382,7 +382,7 @@ static void __init berlin2q_clock_setup(struct device_node *np)
}
/* register clk-provider */
of_clk_add_hw_provider(np, of_clk_hw_onecell_get, &clk_data);
of_clk_add_hw_provider(np, of_clk_hw_onecell_get, clk_data);
return;

View File

@ -82,6 +82,6 @@ static void __init efm32gg_cmu_init(struct device_node *np)
hws[clk_HFPERCLKDAC0] = clk_hw_register_gate(NULL, "HFPERCLK.DAC0",
"HFXO", 0, base + CMU_HFPERCLKEN0, 17, 0, NULL);
of_clk_add_hw_provider(np, of_clk_hw_onecell_get, &clk_data);
of_clk_add_hw_provider(np, of_clk_hw_onecell_get, clk_data);
}
CLK_OF_DECLARE(efm32ggcmu, "efm32gg,cmu", efm32gg_cmu_init);

View File

@ -191,6 +191,8 @@ static struct clk_div_table axi_div_table[] = {
static SUNXI_CCU_DIV_TABLE(axi_clk, "axi", "cpu",
0x050, 0, 3, axi_div_table, 0);
#define SUN6I_A31_AHB1_REG 0x054
static const char * const ahb1_parents[] = { "osc32k", "osc24M",
"axi", "pll-periph" };
@ -1230,6 +1232,16 @@ static void __init sun6i_a31_ccu_setup(struct device_node *node)
val &= BIT(16);
writel(val, reg + SUN6I_A31_PLL_MIPI_REG);
/* Force AHB1 to PLL6 / 3 */
val = readl(reg + SUN6I_A31_AHB1_REG);
/* set PLL6 pre-div = 3 */
val &= ~GENMASK(7, 6);
val |= 0x2 << 6;
/* select PLL6 / pre-div */
val &= ~GENMASK(13, 12);
val |= 0x3 << 12;
writel(val, reg + SUN6I_A31_AHB1_REG);
sunxi_ccu_probe(node, reg, &sun6i_a31_ccu_desc);
ccu_mux_notifier_register(pll_cpu_clk.common.hw.clk,

View File

@ -373,7 +373,7 @@ static void sun4i_get_apb1_factors(struct factors_request *req)
else
calcp = 3;
calcm = (req->parent_rate >> calcp) - 1;
calcm = (div >> calcp) - 1;
req->rate = (req->parent_rate >> calcp) / (calcm + 1);
req->m = calcm;

View File

@ -4010,7 +4010,10 @@ _scsih_eedp_error_handling(struct scsi_cmnd *scmd, u16 ioc_status)
SAM_STAT_CHECK_CONDITION;
}
static inline bool ata_12_16_cmd(struct scsi_cmnd *scmd)
{
return (scmd->cmnd[0] == ATA_12 || scmd->cmnd[0] == ATA_16);
}
/**
* scsih_qcmd - main scsi request entry point
@ -4038,6 +4041,13 @@ scsih_qcmd(struct Scsi_Host *shost, struct scsi_cmnd *scmd)
if (ioc->logging_level & MPT_DEBUG_SCSI)
scsi_print_command(scmd);
/*
* Lock the device for any subsequent command until command is
* done.
*/
if (ata_12_16_cmd(scmd))
scsi_internal_device_block(scmd->device);
sas_device_priv_data = scmd->device->hostdata;
if (!sas_device_priv_data || !sas_device_priv_data->sas_target) {
scmd->result = DID_NO_CONNECT << 16;
@ -4613,6 +4623,9 @@ _scsih_io_done(struct MPT3SAS_ADAPTER *ioc, u16 smid, u8 msix_index, u32 reply)
if (scmd == NULL)
return 1;
if (ata_12_16_cmd(scmd))
scsi_internal_device_unblock(scmd->device, SDEV_RUNNING);
mpi_request = mpt3sas_base_get_msg_frame(ioc, smid);
if (mpi_reply == NULL) {

View File

@ -1456,15 +1456,20 @@ qla2x00_abort_all_cmds(scsi_qla_host_t *vha, int res)
for (cnt = 1; cnt < req->num_outstanding_cmds; cnt++) {
sp = req->outstanding_cmds[cnt];
if (sp) {
/* Get a reference to the sp and drop the lock.
* The reference ensures this sp->done() call
* - and not the call in qla2xxx_eh_abort() -
* ends the SCSI command (with result 'res').
/* Don't abort commands in adapter during EEH
* recovery as it's not accessible/responding.
*/
sp_get(sp);
spin_unlock_irqrestore(&ha->hardware_lock, flags);
qla2xxx_eh_abort(GET_CMD_SP(sp));
spin_lock_irqsave(&ha->hardware_lock, flags);
if (!ha->flags.eeh_busy) {
/* Get a reference to the sp and drop the lock.
* The reference ensures this sp->done() call
* - and not the call in qla2xxx_eh_abort() -
* ends the SCSI command (with result 'res').
*/
sp_get(sp);
spin_unlock_irqrestore(&ha->hardware_lock, flags);
qla2xxx_eh_abort(GET_CMD_SP(sp));
spin_lock_irqsave(&ha->hardware_lock, flags);
}
req->outstanding_cmds[cnt] = NULL;
sp->done(vha, sp, res);
}

View File

@ -669,9 +669,16 @@ static struct thermal_cooling_device_ops powerclamp_cooling_ops = {
.set_cur_state = powerclamp_set_cur_state,
};
static const struct x86_cpu_id __initconst intel_powerclamp_ids[] = {
{ X86_VENDOR_INTEL, X86_FAMILY_ANY, X86_MODEL_ANY, X86_FEATURE_MWAIT },
{}
};
MODULE_DEVICE_TABLE(x86cpu, intel_powerclamp_ids);
static int __init powerclamp_probe(void)
{
if (!boot_cpu_has(X86_FEATURE_MWAIT)) {
if (!x86_match_cpu(intel_powerclamp_ids)) {
pr_err("CPU does not support MWAIT");
return -ENODEV;
}

View File

@ -197,7 +197,7 @@ static int nfs_callback_up_net(int minorversion, struct svc_serv *serv,
}
ret = -EPROTONOSUPPORT;
if (minorversion == 0)
if (!IS_ENABLED(CONFIG_NFS_V4_1) || minorversion == 0)
ret = nfs4_callback_up_net(serv, net);
else if (xprt->ops->bc_up)
ret = xprt->ops->bc_up(serv, net);

View File

@ -542,6 +542,13 @@ static inline bool nfs4_valid_open_stateid(const struct nfs4_state *state)
return test_bit(NFS_STATE_RECOVERY_FAILED, &state->flags) == 0;
}
static inline bool nfs4_state_match_open_stateid_other(const struct nfs4_state *state,
const nfs4_stateid *stateid)
{
return test_bit(NFS_OPEN_STATE, &state->flags) &&
nfs4_stateid_match_other(&state->open_stateid, stateid);
}
#else
#define nfs4_close_state(a, b) do { } while (0)

View File

@ -1451,7 +1451,6 @@ static void nfs_resync_open_stateid_locked(struct nfs4_state *state)
}
static void nfs_clear_open_stateid_locked(struct nfs4_state *state,
nfs4_stateid *arg_stateid,
nfs4_stateid *stateid, fmode_t fmode)
{
clear_bit(NFS_O_RDWR_STATE, &state->flags);
@ -1469,10 +1468,9 @@ static void nfs_clear_open_stateid_locked(struct nfs4_state *state,
}
if (stateid == NULL)
return;
/* Handle races with OPEN */
if (!nfs4_stateid_match_other(arg_stateid, &state->open_stateid) ||
(nfs4_stateid_match_other(stateid, &state->open_stateid) &&
!nfs4_stateid_is_newer(stateid, &state->open_stateid))) {
/* Handle OPEN+OPEN_DOWNGRADE races */
if (nfs4_stateid_match_other(stateid, &state->open_stateid) &&
!nfs4_stateid_is_newer(stateid, &state->open_stateid)) {
nfs_resync_open_stateid_locked(state);
return;
}
@ -1486,7 +1484,9 @@ static void nfs_clear_open_stateid(struct nfs4_state *state,
nfs4_stateid *stateid, fmode_t fmode)
{
write_seqlock(&state->seqlock);
nfs_clear_open_stateid_locked(state, arg_stateid, stateid, fmode);
/* Ignore, if the CLOSE argment doesn't match the current stateid */
if (nfs4_state_match_open_stateid_other(state, arg_stateid))
nfs_clear_open_stateid_locked(state, stateid, fmode);
write_sequnlock(&state->seqlock);
if (test_bit(NFS_STATE_RECLAIM_NOGRACE, &state->flags))
nfs4_schedule_state_manager(state->owner->so_server->nfs_client);
@ -2564,15 +2564,23 @@ static void nfs41_check_delegation_stateid(struct nfs4_state *state)
static int nfs41_check_expired_locks(struct nfs4_state *state)
{
int status, ret = NFS_OK;
struct nfs4_lock_state *lsp;
struct nfs4_lock_state *lsp, *prev = NULL;
struct nfs_server *server = NFS_SERVER(state->inode);
if (!test_bit(LK_STATE_IN_USE, &state->flags))
goto out;
spin_lock(&state->state_lock);
list_for_each_entry(lsp, &state->lock_states, ls_locks) {
if (test_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags)) {
struct rpc_cred *cred = lsp->ls_state->owner->so_cred;
atomic_inc(&lsp->ls_count);
spin_unlock(&state->state_lock);
nfs4_put_lock_state(prev);
prev = lsp;
status = nfs41_test_and_free_expired_stateid(server,
&lsp->ls_stateid,
cred);
@ -2585,10 +2593,14 @@ static int nfs41_check_expired_locks(struct nfs4_state *state)
set_bit(NFS_LOCK_LOST, &lsp->ls_flags);
} else if (status != NFS_OK) {
ret = status;
break;
nfs4_put_lock_state(prev);
goto out;
}
spin_lock(&state->state_lock);
}
};
}
spin_unlock(&state->state_lock);
nfs4_put_lock_state(prev);
out:
return ret;
}
@ -3122,7 +3134,8 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data)
} else if (is_rdwr)
calldata->arg.fmode |= FMODE_READ|FMODE_WRITE;
if (!nfs4_valid_open_stateid(state))
if (!nfs4_valid_open_stateid(state) ||
test_bit(NFS_OPEN_STATE, &state->flags) == 0)
call_close = 0;
spin_unlock(&state->owner->so_lock);
@ -5569,6 +5582,7 @@ static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata)
switch (task->tk_status) {
case 0:
renew_lease(data->res.server, data->timestamp);
break;
case -NFS4ERR_ADMIN_REVOKED:
case -NFS4ERR_DELEG_REVOKED:
case -NFS4ERR_EXPIRED:
@ -5579,8 +5593,6 @@ static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata)
case -NFS4ERR_OLD_STATEID:
case -NFS4ERR_STALE_STATEID:
task->tk_status = 0;
if (data->roc)
pnfs_roc_set_barrier(data->inode, data->roc_barrier);
break;
default:
if (nfs4_async_handle_error(task, data->res.server,
@ -5590,6 +5602,8 @@ static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata)
}
}
data->rpc_status = task->tk_status;
if (data->roc && data->rpc_status == 0)
pnfs_roc_set_barrier(data->inode, data->roc_barrier);
}
static void nfs4_delegreturn_release(void *calldata)

View File

@ -1547,6 +1547,7 @@ restart:
ssleep(1);
case -NFS4ERR_ADMIN_REVOKED:
case -NFS4ERR_STALE_STATEID:
case -NFS4ERR_OLD_STATEID:
case -NFS4ERR_BAD_STATEID:
case -NFS4ERR_RECLAIM_BAD:
case -NFS4ERR_RECLAIM_CONFLICT:

View File

@ -2571,6 +2571,7 @@ extern void sched_autogroup_create_attach(struct task_struct *p);
extern void sched_autogroup_detach(struct task_struct *p);
extern void sched_autogroup_fork(struct signal_struct *sig);
extern void sched_autogroup_exit(struct signal_struct *sig);
extern void sched_autogroup_exit_task(struct task_struct *p);
#ifdef CONFIG_PROC_FS
extern void proc_sched_autogroup_show_task(struct task_struct *p, struct seq_file *m);
extern int proc_sched_autogroup_set_nice(struct task_struct *p, int nice);
@ -2580,6 +2581,7 @@ static inline void sched_autogroup_create_attach(struct task_struct *p) { }
static inline void sched_autogroup_detach(struct task_struct *p) { }
static inline void sched_autogroup_fork(struct signal_struct *sig) { }
static inline void sched_autogroup_exit(struct signal_struct *sig) { }
static inline void sched_autogroup_exit_task(struct task_struct *p) { }
#endif
extern int yield_to(struct task_struct *p, bool preempt);

View File

@ -902,6 +902,17 @@ list_update_cgroup_event(struct perf_event *event,
* this will always be called from the right CPU.
*/
cpuctx = __get_cpu_context(ctx);
/* Only set/clear cpuctx->cgrp if current task uses event->cgrp. */
if (perf_cgroup_from_task(current, ctx) != event->cgrp) {
/*
* We are removing the last cpu event in this context.
* If that event is not active in this cpu, cpuctx->cgrp
* should've been cleared by perf_cgroup_switch.
*/
WARN_ON_ONCE(!add && cpuctx->cgrp);
return;
}
cpuctx->cgrp = add ? event->cgrp : NULL;
}
@ -8018,6 +8029,7 @@ restart:
* if <size> is not specified, the range is treated as a single address.
*/
enum {
IF_ACT_NONE = -1,
IF_ACT_FILTER,
IF_ACT_START,
IF_ACT_STOP,
@ -8041,6 +8053,7 @@ static const match_table_t if_tokens = {
{ IF_SRC_KERNEL, "%u/%u" },
{ IF_SRC_FILEADDR, "%u@%s" },
{ IF_SRC_KERNELADDR, "%u" },
{ IF_ACT_NONE, NULL },
};
/*

View File

@ -836,6 +836,7 @@ void __noreturn do_exit(long code)
*/
perf_event_exit_task(tsk);
sched_autogroup_exit_task(tsk);
cgroup_exit(tsk);
/*

View File

@ -65,8 +65,72 @@ static inline void clear_rt_mutex_waiters(struct rt_mutex *lock)
static void fixup_rt_mutex_waiters(struct rt_mutex *lock)
{
if (!rt_mutex_has_waiters(lock))
clear_rt_mutex_waiters(lock);
unsigned long owner, *p = (unsigned long *) &lock->owner;
if (rt_mutex_has_waiters(lock))
return;
/*
* The rbtree has no waiters enqueued, now make sure that the
* lock->owner still has the waiters bit set, otherwise the
* following can happen:
*
* CPU 0 CPU 1 CPU2
* l->owner=T1
* rt_mutex_lock(l)
* lock(l->lock)
* l->owner = T1 | HAS_WAITERS;
* enqueue(T2)
* boost()
* unlock(l->lock)
* block()
*
* rt_mutex_lock(l)
* lock(l->lock)
* l->owner = T1 | HAS_WAITERS;
* enqueue(T3)
* boost()
* unlock(l->lock)
* block()
* signal(->T2) signal(->T3)
* lock(l->lock)
* dequeue(T2)
* deboost()
* unlock(l->lock)
* lock(l->lock)
* dequeue(T3)
* ==> wait list is empty
* deboost()
* unlock(l->lock)
* lock(l->lock)
* fixup_rt_mutex_waiters()
* if (wait_list_empty(l) {
* l->owner = owner
* owner = l->owner & ~HAS_WAITERS;
* ==> l->owner = T1
* }
* lock(l->lock)
* rt_mutex_unlock(l) fixup_rt_mutex_waiters()
* if (wait_list_empty(l) {
* owner = l->owner & ~HAS_WAITERS;
* cmpxchg(l->owner, T1, NULL)
* ===> Success (l->owner = NULL)
*
* l->owner = owner
* ==> l->owner = T1
* }
*
* With the check for the waiter bit in place T3 on CPU2 will not
* overwrite. All tasks fiddling with the waiters bit are
* serialized by l->lock, so nothing else can modify the waiters
* bit. If the bit is set then nothing can change l->owner either
* so the simple RMW is safe. The cmpxchg() will simply fail if it
* happens in the middle of the RMW because the waiters bit is
* still set.
*/
owner = READ_ONCE(*p);
if (owner & RT_MUTEX_HAS_WAITERS)
WRITE_ONCE(*p, owner & ~RT_MUTEX_HAS_WAITERS);
}
/*

View File

@ -75,8 +75,9 @@ task_top_pi_waiter(struct task_struct *p)
static inline struct task_struct *rt_mutex_owner(struct rt_mutex *lock)
{
return (struct task_struct *)
((unsigned long)lock->owner & ~RT_MUTEX_OWNER_MASKALL);
unsigned long owner = (unsigned long) READ_ONCE(lock->owner);
return (struct task_struct *) (owner & ~RT_MUTEX_OWNER_MASKALL);
}
/*

View File

@ -111,10 +111,13 @@ bool task_wants_autogroup(struct task_struct *p, struct task_group *tg)
{
if (tg != &root_task_group)
return false;
/*
* We can only assume the task group can't go away on us if
* autogroup_move_group() can see us on ->thread_group list.
* If we race with autogroup_move_group() the caller can use the old
* value of signal->autogroup but in this case sched_move_task() will
* be called again before autogroup_kref_put().
*
* However, there is no way sched_autogroup_exit_task() could tell us
* to avoid autogroup->tg, so we abuse PF_EXITING flag for this case.
*/
if (p->flags & PF_EXITING)
return false;
@ -122,6 +125,16 @@ bool task_wants_autogroup(struct task_struct *p, struct task_group *tg)
return true;
}
void sched_autogroup_exit_task(struct task_struct *p)
{
/*
* We are going to call exit_notify() and autogroup_move_group() can't
* see this thread after that: we can no longer use signal->autogroup.
* See the PF_EXITING check in task_wants_autogroup().
*/
sched_move_task(p);
}
static void
autogroup_move_group(struct task_struct *p, struct autogroup *ag)
{
@ -138,13 +151,20 @@ autogroup_move_group(struct task_struct *p, struct autogroup *ag)
}
p->signal->autogroup = autogroup_kref_get(ag);
if (!READ_ONCE(sysctl_sched_autogroup_enabled))
goto out;
/*
* We can't avoid sched_move_task() after we changed signal->autogroup,
* this process can already run with task_group() == prev->tg or we can
* race with cgroup code which can read autogroup = prev under rq->lock.
* In the latter case for_each_thread() can not miss a migrating thread,
* cpu_cgroup_attach() must not be possible after cgroup_exit() and it
* can't be removed from thread list, we hold ->siglock.
*
* If an exiting thread was already removed from thread list we rely on
* sched_autogroup_exit_task().
*/
for_each_thread(p, t)
sched_move_task(t);
out:
unlock_task_sighand(p, &flags);
autogroup_kref_put(prev);
}

View File

@ -980,23 +980,23 @@ static void dotest(void (*testcase_fn)(void), int expected, int lockclass_mask)
#ifndef CONFIG_PROVE_LOCKING
if (expected == FAILURE && debug_locks) {
expected_testcase_failures++;
printk("failed|");
pr_cont("failed|");
}
else
#endif
if (debug_locks != expected) {
unexpected_testcase_failures++;
printk("FAILED|");
pr_cont("FAILED|");
dump_stack();
} else {
testcase_successes++;
printk(" ok |");
pr_cont(" ok |");
}
testcase_total++;
if (debug_locks_verbose)
printk(" lockclass mask: %x, debug_locks: %d, expected: %d\n",
pr_cont(" lockclass mask: %x, debug_locks: %d, expected: %d\n",
lockclass_mask, debug_locks, expected);
/*
* Some tests (e.g. double-unlock) might corrupt the preemption
@ -1021,26 +1021,26 @@ static inline void print_testname(const char *testname)
#define DO_TESTCASE_1(desc, name, nr) \
print_testname(desc"/"#nr); \
dotest(name##_##nr, SUCCESS, LOCKTYPE_RWLOCK); \
printk("\n");
pr_cont("\n");
#define DO_TESTCASE_1B(desc, name, nr) \
print_testname(desc"/"#nr); \
dotest(name##_##nr, FAILURE, LOCKTYPE_RWLOCK); \
printk("\n");
pr_cont("\n");
#define DO_TESTCASE_3(desc, name, nr) \
print_testname(desc"/"#nr); \
dotest(name##_spin_##nr, FAILURE, LOCKTYPE_SPIN); \
dotest(name##_wlock_##nr, FAILURE, LOCKTYPE_RWLOCK); \
dotest(name##_rlock_##nr, SUCCESS, LOCKTYPE_RWLOCK); \
printk("\n");
pr_cont("\n");
#define DO_TESTCASE_3RW(desc, name, nr) \
print_testname(desc"/"#nr); \
dotest(name##_spin_##nr, FAILURE, LOCKTYPE_SPIN|LOCKTYPE_RWLOCK);\
dotest(name##_wlock_##nr, FAILURE, LOCKTYPE_RWLOCK); \
dotest(name##_rlock_##nr, SUCCESS, LOCKTYPE_RWLOCK); \
printk("\n");
pr_cont("\n");
#define DO_TESTCASE_6(desc, name) \
print_testname(desc); \
@ -1050,7 +1050,7 @@ static inline void print_testname(const char *testname)
dotest(name##_mutex, FAILURE, LOCKTYPE_MUTEX); \
dotest(name##_wsem, FAILURE, LOCKTYPE_RWSEM); \
dotest(name##_rsem, FAILURE, LOCKTYPE_RWSEM); \
printk("\n");
pr_cont("\n");
#define DO_TESTCASE_6_SUCCESS(desc, name) \
print_testname(desc); \
@ -1060,7 +1060,7 @@ static inline void print_testname(const char *testname)
dotest(name##_mutex, SUCCESS, LOCKTYPE_MUTEX); \
dotest(name##_wsem, SUCCESS, LOCKTYPE_RWSEM); \
dotest(name##_rsem, SUCCESS, LOCKTYPE_RWSEM); \
printk("\n");
pr_cont("\n");
/*
* 'read' variant: rlocks must not trigger.
@ -1073,7 +1073,7 @@ static inline void print_testname(const char *testname)
dotest(name##_mutex, FAILURE, LOCKTYPE_MUTEX); \
dotest(name##_wsem, FAILURE, LOCKTYPE_RWSEM); \
dotest(name##_rsem, FAILURE, LOCKTYPE_RWSEM); \
printk("\n");
pr_cont("\n");
#define DO_TESTCASE_2I(desc, name, nr) \
DO_TESTCASE_1("hard-"desc, name##_hard, nr); \
@ -1726,25 +1726,25 @@ static void ww_tests(void)
dotest(ww_test_fail_acquire, SUCCESS, LOCKTYPE_WW);
dotest(ww_test_normal, SUCCESS, LOCKTYPE_WW);
dotest(ww_test_unneeded_slow, FAILURE, LOCKTYPE_WW);
printk("\n");
pr_cont("\n");
print_testname("ww contexts mixing");
dotest(ww_test_two_contexts, FAILURE, LOCKTYPE_WW);
dotest(ww_test_diff_class, FAILURE, LOCKTYPE_WW);
printk("\n");
pr_cont("\n");
print_testname("finishing ww context");
dotest(ww_test_context_done_twice, FAILURE, LOCKTYPE_WW);
dotest(ww_test_context_unlock_twice, FAILURE, LOCKTYPE_WW);
dotest(ww_test_context_fini_early, FAILURE, LOCKTYPE_WW);
dotest(ww_test_context_lock_after_done, FAILURE, LOCKTYPE_WW);
printk("\n");
pr_cont("\n");
print_testname("locking mismatches");
dotest(ww_test_object_unlock_twice, FAILURE, LOCKTYPE_WW);
dotest(ww_test_object_lock_unbalanced, FAILURE, LOCKTYPE_WW);
dotest(ww_test_object_lock_stale_context, FAILURE, LOCKTYPE_WW);
printk("\n");
pr_cont("\n");
print_testname("EDEADLK handling");
dotest(ww_test_edeadlk_normal, SUCCESS, LOCKTYPE_WW);
@ -1757,11 +1757,11 @@ static void ww_tests(void)
dotest(ww_test_edeadlk_acquire_more_edeadlk_slow, FAILURE, LOCKTYPE_WW);
dotest(ww_test_edeadlk_acquire_wrong, FAILURE, LOCKTYPE_WW);
dotest(ww_test_edeadlk_acquire_wrong_slow, FAILURE, LOCKTYPE_WW);
printk("\n");
pr_cont("\n");
print_testname("spinlock nest unlocked");
dotest(ww_test_spin_nest_unlocked, FAILURE, LOCKTYPE_WW);
printk("\n");
pr_cont("\n");
printk(" -----------------------------------------------------\n");
printk(" |block | try |context|\n");
@ -1771,25 +1771,25 @@ static void ww_tests(void)
dotest(ww_test_context_block, FAILURE, LOCKTYPE_WW);
dotest(ww_test_context_try, SUCCESS, LOCKTYPE_WW);
dotest(ww_test_context_context, SUCCESS, LOCKTYPE_WW);
printk("\n");
pr_cont("\n");
print_testname("try");
dotest(ww_test_try_block, FAILURE, LOCKTYPE_WW);
dotest(ww_test_try_try, SUCCESS, LOCKTYPE_WW);
dotest(ww_test_try_context, FAILURE, LOCKTYPE_WW);
printk("\n");
pr_cont("\n");
print_testname("block");
dotest(ww_test_block_block, FAILURE, LOCKTYPE_WW);
dotest(ww_test_block_try, SUCCESS, LOCKTYPE_WW);
dotest(ww_test_block_context, FAILURE, LOCKTYPE_WW);
printk("\n");
pr_cont("\n");
print_testname("spinlock");
dotest(ww_test_spin_block, FAILURE, LOCKTYPE_WW);
dotest(ww_test_spin_try, SUCCESS, LOCKTYPE_WW);
dotest(ww_test_spin_context, FAILURE, LOCKTYPE_WW);
printk("\n");
pr_cont("\n");
}
void locking_selftest(void)
@ -1829,32 +1829,32 @@ void locking_selftest(void)
printk(" --------------------------------------------------------------------------\n");
print_testname("recursive read-lock");
printk(" |");
pr_cont(" |");
dotest(rlock_AA1, SUCCESS, LOCKTYPE_RWLOCK);
printk(" |");
pr_cont(" |");
dotest(rsem_AA1, FAILURE, LOCKTYPE_RWSEM);
printk("\n");
pr_cont("\n");
print_testname("recursive read-lock #2");
printk(" |");
pr_cont(" |");
dotest(rlock_AA1B, SUCCESS, LOCKTYPE_RWLOCK);
printk(" |");
pr_cont(" |");
dotest(rsem_AA1B, FAILURE, LOCKTYPE_RWSEM);
printk("\n");
pr_cont("\n");
print_testname("mixed read-write-lock");
printk(" |");
pr_cont(" |");
dotest(rlock_AA2, FAILURE, LOCKTYPE_RWLOCK);
printk(" |");
pr_cont(" |");
dotest(rsem_AA2, FAILURE, LOCKTYPE_RWSEM);
printk("\n");
pr_cont("\n");
print_testname("mixed write-read-lock");
printk(" |");
pr_cont(" |");
dotest(rlock_AA3, FAILURE, LOCKTYPE_RWLOCK);
printk(" |");
pr_cont(" |");
dotest(rsem_AA3, FAILURE, LOCKTYPE_RWSEM);
printk("\n");
pr_cont("\n");
printk(" --------------------------------------------------------------------------\n");