Revert "Merge branch 'pub/lts/caelli_ras' into 'pub/lts/0009-kabi' (merge request !671)"
This reverts commit 1eda0438d7
.
This commit is contained in:
parent
e431a54055
commit
fcad35499e
|
@ -172,7 +172,7 @@ enum mce_notifier_prios {
|
|||
MCE_PRIO_EDAC,
|
||||
MCE_PRIO_NFIT,
|
||||
MCE_PRIO_EXTLOG,
|
||||
MCE_PRIO_UC,
|
||||
MCE_PRIO_SRAO,
|
||||
MCE_PRIO_EARLY,
|
||||
MCE_PRIO_CEC
|
||||
};
|
||||
|
|
|
@ -167,6 +167,8 @@ void mce_inject_log(struct mce *m)
|
|||
}
|
||||
EXPORT_SYMBOL_GPL(mce_inject_log);
|
||||
|
||||
static struct notifier_block mce_srao_nb;
|
||||
|
||||
void mce_register_decode_chain(struct notifier_block *nb)
|
||||
{
|
||||
if (WARN_ON(nb->priority > MCE_PRIO_MCELOG && nb->priority < MCE_PRIO_EDAC))
|
||||
|
@ -615,30 +617,28 @@ static struct notifier_block early_nb = {
|
|||
.priority = MCE_PRIO_EARLY,
|
||||
};
|
||||
|
||||
static int uc_decode_notifier(struct notifier_block *nb, unsigned long val,
|
||||
static int srao_decode_notifier(struct notifier_block *nb, unsigned long val,
|
||||
void *data)
|
||||
{
|
||||
struct mce *mce = (struct mce *)data;
|
||||
unsigned long pfn;
|
||||
|
||||
if (!mce || !mce_usable_address(mce))
|
||||
return NOTIFY_DONE;
|
||||
|
||||
if (mce->severity != MCE_AO_SEVERITY &&
|
||||
mce->severity != MCE_DEFERRED_SEVERITY)
|
||||
if (!mce)
|
||||
return NOTIFY_DONE;
|
||||
|
||||
if (mce_usable_address(mce) && (mce->severity == MCE_AO_SEVERITY)) {
|
||||
pfn = mce->addr >> PAGE_SHIFT;
|
||||
if (!memory_failure(pfn, 0)) {
|
||||
set_mce_nospec(pfn, whole_page(mce));
|
||||
mce->kflags |= MCE_HANDLED_UC;
|
||||
}
|
||||
}
|
||||
|
||||
return NOTIFY_OK;
|
||||
}
|
||||
static struct notifier_block mce_uc_nb = {
|
||||
.notifier_call = uc_decode_notifier,
|
||||
.priority = MCE_PRIO_UC,
|
||||
static struct notifier_block mce_srao_nb = {
|
||||
.notifier_call = srao_decode_notifier,
|
||||
.priority = MCE_PRIO_SRAO,
|
||||
};
|
||||
|
||||
static int mce_default_notifier(struct notifier_block *nb, unsigned long val,
|
||||
|
@ -1214,9 +1214,6 @@ static void __mc_scan_banks(struct mce *m, struct pt_regs *regs, struct mce *fin
|
|||
|
||||
static void kill_me_now(struct callback_head *ch)
|
||||
{
|
||||
struct task_struct *p = container_of(ch, struct task_struct, mce_kill_me);
|
||||
|
||||
p->mce_count = 0;
|
||||
force_sig(SIGBUS);
|
||||
}
|
||||
|
||||
|
@ -1224,65 +1221,36 @@ static void kill_me_maybe(struct callback_head *cb)
|
|||
{
|
||||
struct task_struct *p = container_of(cb, struct task_struct, mce_kill_me);
|
||||
int flags = MF_ACTION_REQUIRED;
|
||||
int ret;
|
||||
|
||||
p->mce_count = 0;
|
||||
pr_err("Uncorrected hardware memory error in user-access at %llx", p->mce_addr);
|
||||
if (!p->mce_ripv)
|
||||
flags |= MF_MUST_KILL;
|
||||
|
||||
ret = memory_failure(p->mce_addr >> PAGE_SHIFT, flags);
|
||||
if (!ret) {
|
||||
if (!memory_failure(p->mce_addr >> PAGE_SHIFT, flags) &&
|
||||
!(p->mce_kflags & MCE_IN_KERNEL_COPYIN)) {
|
||||
set_mce_nospec(p->mce_addr >> PAGE_SHIFT, p->mce_whole_page);
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* -EHWPOISON from memory_failure() means that it already sent SIGBUS
|
||||
* to the current process with the proper error info, so no need to
|
||||
* send SIGBUS here again.
|
||||
*/
|
||||
if (ret == -EHWPOISON)
|
||||
return;
|
||||
|
||||
if (p->mce_vaddr != (void __user *)-1l) {
|
||||
force_sig_mceerr(BUS_MCEERR_AR, p->mce_vaddr, PAGE_SHIFT);
|
||||
} else {
|
||||
pr_err("Memory error not recovered");
|
||||
kill_me_now(cb);
|
||||
}
|
||||
}
|
||||
|
||||
static void kill_me_never(struct callback_head *cb)
|
||||
static void queue_task_work(struct mce *m, int kill_it)
|
||||
{
|
||||
struct task_struct *p = container_of(cb, struct task_struct, mce_kill_me);
|
||||
|
||||
p->mce_count = 0;
|
||||
pr_err("Kernel accessed poison in user space at %llx\n", p->mce_addr);
|
||||
if (!memory_failure(p->mce_addr >> PAGE_SHIFT, 0))
|
||||
set_mce_nospec(p->mce_addr >> PAGE_SHIFT, p->mce_whole_page);
|
||||
}
|
||||
|
||||
static void queue_task_work(struct mce *m, char *msg, void (*func)(struct callback_head *))
|
||||
{
|
||||
int count = ++current->mce_count;
|
||||
|
||||
/* First call, save all the details */
|
||||
if (count == 1) {
|
||||
current->mce_addr = m->addr;
|
||||
current->mce_kflags = m->kflags;
|
||||
current->mce_ripv = !!(m->mcgstatus & MCG_STATUS_RIPV);
|
||||
current->mce_whole_page = whole_page(m);
|
||||
current->mce_kill_me.func = func;
|
||||
}
|
||||
|
||||
/* Ten is likely overkill. Don't expect more than two faults before task_work() */
|
||||
if (count > 10)
|
||||
mce_panic("Too many consecutive machine checks while accessing user data", m, msg);
|
||||
|
||||
/* Second or later call, make sure page address matches the one from first call */
|
||||
if (count > 1 && (current->mce_addr >> PAGE_SHIFT) != (m->addr >> PAGE_SHIFT))
|
||||
mce_panic("Consecutive machine checks to different user pages", m, msg);
|
||||
|
||||
/* Do not call task_work_add() more than once */
|
||||
if (count > 1)
|
||||
return;
|
||||
if (kill_it)
|
||||
current->mce_kill_me.func = kill_me_now;
|
||||
else
|
||||
current->mce_kill_me.func = kill_me_maybe;
|
||||
|
||||
task_work_add(current, ¤t->mce_kill_me, true);
|
||||
}
|
||||
|
@ -1433,10 +1401,8 @@ void do_machine_check(struct pt_regs *regs, long error_code)
|
|||
/* If this triggers there is no way to recover. Die hard. */
|
||||
BUG_ON(!on_thread_stack() || !user_mode(regs));
|
||||
|
||||
if (kill_it)
|
||||
queue_task_work(&m, msg, kill_me_now);
|
||||
else
|
||||
queue_task_work(&m, msg, kill_me_maybe);
|
||||
queue_task_work(&m, kill_it);
|
||||
|
||||
} else {
|
||||
/*
|
||||
* Handle an MCE which has happened in kernel space but from
|
||||
|
@ -1453,7 +1419,7 @@ void do_machine_check(struct pt_regs *regs, long error_code)
|
|||
}
|
||||
|
||||
if (m.kflags & MCE_IN_KERNEL_COPYIN)
|
||||
queue_task_work(&m, msg, kill_me_never);
|
||||
queue_task_work(&m, kill_it);
|
||||
}
|
||||
|
||||
out_ist:
|
||||
|
@ -2082,7 +2048,7 @@ int __init mcheck_init(void)
|
|||
{
|
||||
mcheck_intel_therm_init();
|
||||
mce_register_decode_chain(&early_nb);
|
||||
mce_register_decode_chain(&mce_uc_nb);
|
||||
mce_register_decode_chain(&mce_srao_nb);
|
||||
mce_register_decode_chain(&mce_default_nb);
|
||||
mcheck_vendor_init_severity();
|
||||
|
||||
|
|
|
@ -224,7 +224,6 @@ EXPORT_SYMBOL(copy_user_enhanced_fast_string)
|
|||
* Don't try to copy the tail if machine check happened
|
||||
*
|
||||
* Input:
|
||||
* eax trap number written by ex_handler_copy()
|
||||
* rdi destination
|
||||
* rsi source
|
||||
* rdx count
|
||||
|
@ -234,17 +233,22 @@ EXPORT_SYMBOL(copy_user_enhanced_fast_string)
|
|||
*/
|
||||
ALIGN;
|
||||
.Lcopy_user_handle_tail:
|
||||
cmp $18,%eax
|
||||
je 3f
|
||||
|
||||
movl %edx,%ecx
|
||||
cmp $18,%eax /* check if X86_TRAP_MC */
|
||||
je 3f
|
||||
1: rep movsb
|
||||
2: mov %ecx,%eax
|
||||
ASM_CLAC
|
||||
ret
|
||||
|
||||
3:
|
||||
movl %edx,%eax
|
||||
/*
|
||||
* Return zero to pretend that this copy succeeded. This
|
||||
* is counter-intuitive, but needed to prevent the code
|
||||
* in lib/iov_iter.c from retrying and running back into
|
||||
* the poison cache line again. The machine check handler
|
||||
* will ensure that a SIGBUS is sent to the task.
|
||||
*/
|
||||
3: xorl %eax,%eax
|
||||
ASM_CLAC
|
||||
ret
|
||||
|
||||
|
|
|
@ -30,128 +30,14 @@
|
|||
readl((m)->mbase + 0x20970 + (i) * 0x4000 + (j) * 4)
|
||||
#define I10NM_GET_MCMTR(m, i) \
|
||||
readl((m)->mbase + 0x20ef8 + (i) * 0x4000)
|
||||
#define I10NM_GET_REG32(m, i, offset) \
|
||||
readl((m)->mbase + (i) * 0x4000 + (offset))
|
||||
#define I10NM_GET_REG64(m, i, offset) \
|
||||
readq((m)->mbase + (i) * 0x4000 + (offset))
|
||||
#define I10NM_SET_REG32(m, i, offset, v) \
|
||||
writel(v, (m)->mbase + (i) * 0x4000 + (offset))
|
||||
|
||||
#define I10NM_GET_SCK_MMIO_BASE(reg) (GET_BITFIELD(reg, 0, 28) << 23)
|
||||
#define I10NM_GET_IMC_MMIO_OFFSET(reg) (GET_BITFIELD(reg, 0, 10) << 12)
|
||||
#define I10NM_GET_IMC_MMIO_SIZE(reg) ((GET_BITFIELD(reg, 13, 23) - \
|
||||
GET_BITFIELD(reg, 0, 10) + 1) << 12)
|
||||
|
||||
#define RETRY_RD_ERR_LOG_UC BIT(1)
|
||||
#define RETRY_RD_ERR_LOG_NOOVER BIT(14)
|
||||
#define RETRY_RD_ERR_LOG_EN BIT(15)
|
||||
#define RETRY_RD_ERR_LOG_NOOVER_UC (BIT(14) | BIT(1))
|
||||
#define RETRY_RD_ERR_LOG_OVER_UC_V (BIT(2) | BIT(1) | BIT(0))
|
||||
|
||||
static struct list_head *i10nm_edac_list;
|
||||
|
||||
static struct res_config *res_cfg;
|
||||
static int retry_rd_err_log;
|
||||
|
||||
static u32 offsets_scrub_icx[] = {0x22c60, 0x22c54, 0x22c5c, 0x22c58, 0x22c28, 0x20ed8};
|
||||
static u32 offsets_demand_icx[] = {0x22e54, 0x22e60, 0x22e64, 0x22e58, 0x22e5c, 0x20ee0};
|
||||
|
||||
static void __enable_retry_rd_err_log(struct skx_imc *imc, int chan, bool enable)
|
||||
{
|
||||
u32 s, d;
|
||||
|
||||
if (!imc->mbase)
|
||||
return;
|
||||
|
||||
s = I10NM_GET_REG32(imc, chan, res_cfg->offsets_scrub[0]);
|
||||
d = I10NM_GET_REG32(imc, chan, res_cfg->offsets_demand[0]);
|
||||
|
||||
if (enable) {
|
||||
/* Save default configurations */
|
||||
imc->chan[chan].retry_rd_err_log_s = s;
|
||||
imc->chan[chan].retry_rd_err_log_d = d;
|
||||
|
||||
s &= ~RETRY_RD_ERR_LOG_NOOVER_UC;
|
||||
s |= RETRY_RD_ERR_LOG_EN;
|
||||
d &= ~RETRY_RD_ERR_LOG_NOOVER_UC;
|
||||
d |= RETRY_RD_ERR_LOG_EN;
|
||||
} else {
|
||||
/* Restore default configurations */
|
||||
if (imc->chan[chan].retry_rd_err_log_s & RETRY_RD_ERR_LOG_UC)
|
||||
s |= RETRY_RD_ERR_LOG_UC;
|
||||
if (imc->chan[chan].retry_rd_err_log_s & RETRY_RD_ERR_LOG_NOOVER)
|
||||
s |= RETRY_RD_ERR_LOG_NOOVER;
|
||||
if (!(imc->chan[chan].retry_rd_err_log_s & RETRY_RD_ERR_LOG_EN))
|
||||
s &= ~RETRY_RD_ERR_LOG_EN;
|
||||
if (imc->chan[chan].retry_rd_err_log_d & RETRY_RD_ERR_LOG_UC)
|
||||
d |= RETRY_RD_ERR_LOG_UC;
|
||||
if (imc->chan[chan].retry_rd_err_log_d & RETRY_RD_ERR_LOG_NOOVER)
|
||||
d |= RETRY_RD_ERR_LOG_NOOVER;
|
||||
if (!(imc->chan[chan].retry_rd_err_log_d & RETRY_RD_ERR_LOG_EN))
|
||||
d &= ~RETRY_RD_ERR_LOG_EN;
|
||||
}
|
||||
|
||||
I10NM_SET_REG32(imc, chan, res_cfg->offsets_scrub[0], s);
|
||||
I10NM_SET_REG32(imc, chan, res_cfg->offsets_demand[0], d);
|
||||
}
|
||||
|
||||
static void enable_retry_rd_err_log(bool enable)
|
||||
{
|
||||
struct skx_dev *d;
|
||||
int i, j;
|
||||
|
||||
edac_dbg(2, "\n");
|
||||
|
||||
list_for_each_entry(d, i10nm_edac_list, list)
|
||||
for (i = 0; i < I10NM_NUM_IMC; i++)
|
||||
for (j = 0; j < I10NM_NUM_CHANNELS; j++)
|
||||
__enable_retry_rd_err_log(&d->imc[i], j, enable);
|
||||
}
|
||||
|
||||
static void show_retry_rd_err_log(struct decoded_addr *res, char *msg,
|
||||
int len, bool scrub_err)
|
||||
{
|
||||
struct skx_imc *imc = &res->dev->imc[res->imc];
|
||||
u32 log0, log1, log2, log3, log4;
|
||||
u32 corr0, corr1, corr2, corr3;
|
||||
u64 log5;
|
||||
u32 *offsets;
|
||||
int n;
|
||||
|
||||
if (!imc->mbase)
|
||||
return;
|
||||
|
||||
offsets = scrub_err ? res_cfg->offsets_scrub : res_cfg->offsets_demand;
|
||||
|
||||
log0 = I10NM_GET_REG32(imc, res->channel, offsets[0]);
|
||||
log1 = I10NM_GET_REG32(imc, res->channel, offsets[1]);
|
||||
log2 = I10NM_GET_REG32(imc, res->channel, offsets[2]);
|
||||
log3 = I10NM_GET_REG32(imc, res->channel, offsets[3]);
|
||||
log4 = I10NM_GET_REG32(imc, res->channel, offsets[4]);
|
||||
log5 = I10NM_GET_REG64(imc, res->channel, offsets[5]);
|
||||
n = snprintf(msg, len, " retry_rd_err_log[%.8x %.8x %.8x %.8x %.8x %.16llx]",
|
||||
log0, log1, log2, log3, log4, log5);
|
||||
|
||||
corr0 = I10NM_GET_REG32(imc, res->channel, 0x22c18);
|
||||
corr1 = I10NM_GET_REG32(imc, res->channel, 0x22c1c);
|
||||
corr2 = I10NM_GET_REG32(imc, res->channel, 0x22c20);
|
||||
corr3 = I10NM_GET_REG32(imc, res->channel, 0x22c24);
|
||||
|
||||
if (len - n > 0)
|
||||
snprintf(msg + n, len - n,
|
||||
" correrrcnt[%.4x %.4x %.4x %.4x %.4x %.4x %.4x %.4x]",
|
||||
corr0 & 0xffff, corr0 >> 16,
|
||||
corr1 & 0xffff, corr1 >> 16,
|
||||
corr2 & 0xffff, corr2 >> 16,
|
||||
corr3 & 0xffff, corr3 >> 16);
|
||||
|
||||
/* Clear status bits */
|
||||
if (retry_rd_err_log == 2 && (log0 & RETRY_RD_ERR_LOG_OVER_UC_V)) {
|
||||
log0 &= ~RETRY_RD_ERR_LOG_OVER_UC_V;
|
||||
I10NM_SET_REG32(imc, res->channel, offsets[0], log0);
|
||||
}
|
||||
}
|
||||
|
||||
static struct pci_dev *pci_get_dev_wrapper(int dom, unsigned int bus,
|
||||
unsigned int dev, unsigned int fun)
|
||||
{
|
||||
|
@ -243,16 +129,12 @@ static struct res_config i10nm_cfg0 = {
|
|||
.type = I10NM,
|
||||
.decs_did = 0x3452,
|
||||
.busno_cfg_offset = 0xcc,
|
||||
.offsets_scrub = offsets_scrub_icx,
|
||||
.offsets_demand = offsets_demand_icx,
|
||||
};
|
||||
|
||||
static struct res_config i10nm_cfg1 = {
|
||||
.type = I10NM,
|
||||
.decs_did = 0x3452,
|
||||
.busno_cfg_offset = 0xd0,
|
||||
.offsets_scrub = offsets_scrub_icx,
|
||||
.offsets_demand = offsets_demand_icx,
|
||||
};
|
||||
|
||||
static const struct x86_cpu_id i10nm_cpuids[] = {
|
||||
|
@ -385,7 +267,6 @@ static int __init i10nm_init(void)
|
|||
return -ENODEV;
|
||||
|
||||
cfg = (struct res_config *)id->driver_data;
|
||||
res_cfg = cfg;
|
||||
|
||||
/* Newer steppings have different offset for ATOM_TREMONT_D/ICELAKE_X */
|
||||
if (boot_cpu_data.x86_stepping >= 4)
|
||||
|
@ -442,12 +323,6 @@ static int __init i10nm_init(void)
|
|||
mce_register_decode_chain(&i10nm_mce_dec);
|
||||
setup_i10nm_debug();
|
||||
|
||||
if (retry_rd_err_log && res_cfg->offsets_scrub && res_cfg->offsets_demand) {
|
||||
skx_set_decode(NULL, show_retry_rd_err_log);
|
||||
if (retry_rd_err_log == 2)
|
||||
enable_retry_rd_err_log(true);
|
||||
}
|
||||
|
||||
i10nm_printk(KERN_INFO, "%s\n", I10NM_REVISION);
|
||||
|
||||
return 0;
|
||||
|
@ -459,13 +334,6 @@ fail:
|
|||
static void __exit i10nm_exit(void)
|
||||
{
|
||||
edac_dbg(2, "\n");
|
||||
|
||||
if (retry_rd_err_log && res_cfg->offsets_scrub && res_cfg->offsets_demand) {
|
||||
skx_set_decode(NULL, NULL);
|
||||
if (retry_rd_err_log == 2)
|
||||
enable_retry_rd_err_log(false);
|
||||
}
|
||||
|
||||
teardown_i10nm_debug();
|
||||
mce_unregister_decode_chain(&i10nm_mce_dec);
|
||||
skx_adxl_put();
|
||||
|
@ -475,8 +343,5 @@ static void __exit i10nm_exit(void)
|
|||
module_init(i10nm_init);
|
||||
module_exit(i10nm_exit);
|
||||
|
||||
module_param(retry_rd_err_log, int, 0444);
|
||||
MODULE_PARM_DESC(retry_rd_err_log, "retry_rd_err_log: 0=off(default), 1=bios(Linux doesn't reset any control bits, but just reports values.), 2=linux(Linux tries to take control and resets mode bits, clear valid/UC bits after reading.)");
|
||||
|
||||
MODULE_LICENSE("GPL v2");
|
||||
MODULE_DESCRIPTION("MC Driver for Intel 10nm server processors");
|
||||
|
|
|
@ -231,8 +231,7 @@ static int skx_get_dimm_config(struct mem_ctl_info *mci)
|
|||
#define SKX_ILV_TARGET(tgt) ((tgt) & 7)
|
||||
|
||||
static void skx_show_retry_rd_err_log(struct decoded_addr *res,
|
||||
char *msg, int len,
|
||||
bool scrub_err)
|
||||
char *msg, int len)
|
||||
{
|
||||
u32 log0, log1, log2, log3, log4;
|
||||
u32 corr0, corr1, corr2, corr3;
|
||||
|
|
|
@ -481,7 +481,6 @@ static void skx_mce_output_error(struct mem_ctl_info *mci,
|
|||
bool ripv = GET_BITFIELD(m->mcgstatus, 0, 0);
|
||||
bool overflow = GET_BITFIELD(m->status, 62, 62);
|
||||
bool uncorrected_error = GET_BITFIELD(m->status, 61, 61);
|
||||
bool scrub_err = false;
|
||||
bool recoverable;
|
||||
int len;
|
||||
u32 core_err_cnt = GET_BITFIELD(m->status, 38, 52);
|
||||
|
@ -533,7 +532,6 @@ static void skx_mce_output_error(struct mem_ctl_info *mci,
|
|||
break;
|
||||
case 4:
|
||||
optype = "memory scrubbing error";
|
||||
scrub_err = true;
|
||||
break;
|
||||
default:
|
||||
optype = "reserved";
|
||||
|
@ -556,7 +554,7 @@ static void skx_mce_output_error(struct mem_ctl_info *mci,
|
|||
}
|
||||
|
||||
if (skx_show_retry_rd_err_log)
|
||||
skx_show_retry_rd_err_log(res, skx_msg + len, MSG_SIZE - len, scrub_err);
|
||||
skx_show_retry_rd_err_log(res, skx_msg + len, MSG_SIZE - len);
|
||||
|
||||
edac_dbg(0, "%s\n", skx_msg);
|
||||
|
||||
|
|
|
@ -65,8 +65,6 @@ struct skx_dev {
|
|||
struct skx_channel {
|
||||
struct pci_dev *cdev;
|
||||
struct pci_dev *edev;
|
||||
u32 retry_rd_err_log_s;
|
||||
u32 retry_rd_err_log_d;
|
||||
struct skx_dimm {
|
||||
u8 close_pg;
|
||||
u8 bank_xor_enable;
|
||||
|
@ -120,14 +118,11 @@ struct res_config {
|
|||
unsigned int decs_did;
|
||||
/* Default bus number configuration register offset */
|
||||
int busno_cfg_offset;
|
||||
/* Offsets of retry_rd_err_log registers */
|
||||
u32 *offsets_scrub;
|
||||
u32 *offsets_demand;
|
||||
};
|
||||
|
||||
typedef int (*get_dimm_config_f)(struct mem_ctl_info *mci);
|
||||
typedef bool (*skx_decode_f)(struct decoded_addr *res);
|
||||
typedef void (*skx_show_retry_log_f)(struct decoded_addr *res, char *msg, int len, bool scrub_err);
|
||||
typedef void (*skx_show_retry_log_f)(struct decoded_addr *res, char *msg, int len);
|
||||
|
||||
int __init skx_adxl_get(void);
|
||||
void __exit skx_adxl_put(void);
|
||||
|
|
|
@ -762,6 +762,10 @@ again:
|
|||
* Otherwise there's a nasty deadlock on copying from the
|
||||
* same page as we're writing to, without it being marked
|
||||
* up-to-date.
|
||||
*
|
||||
* Not only is this an optimisation, but it is also required
|
||||
* to check that the address is actually valid, when atomic
|
||||
* usercopies are used, below.
|
||||
*/
|
||||
if (unlikely(iov_iter_fault_in_readable(i, bytes))) {
|
||||
status = -EFAULT;
|
||||
|
@ -784,22 +788,24 @@ again:
|
|||
iomap);
|
||||
if (unlikely(status < 0))
|
||||
break;
|
||||
copied = status;
|
||||
|
||||
cond_resched();
|
||||
|
||||
if (unlikely(status == 0)) {
|
||||
iov_iter_advance(i, copied);
|
||||
if (unlikely(copied == 0)) {
|
||||
/*
|
||||
* A short copy made iomap_write_end() reject the
|
||||
* thing entirely. Might be memory poisoning
|
||||
* halfway through, might be a race with munmap,
|
||||
* might be severe memory pressure.
|
||||
* If we were unable to copy any data at all, we must
|
||||
* fall back to a single segment length write.
|
||||
*
|
||||
* If we didn't fallback here, we could livelock
|
||||
* because not all segments in the iov can be copied at
|
||||
* once without a pagefault.
|
||||
*/
|
||||
if (copied)
|
||||
bytes = copied;
|
||||
bytes = min_t(unsigned long, PAGE_SIZE - offset,
|
||||
iov_iter_single_seg_count(i));
|
||||
goto again;
|
||||
}
|
||||
copied = status;
|
||||
iov_iter_advance(i, copied);
|
||||
pos += copied;
|
||||
written += copied;
|
||||
length -= copied;
|
||||
|
|
|
@ -1292,7 +1292,6 @@ struct task_struct {
|
|||
__mce_reserved : 62;
|
||||
|
||||
struct callback_head mce_kill_me;
|
||||
int mce_count;
|
||||
#endif
|
||||
|
||||
/*
|
||||
|
|
|
@ -321,11 +321,6 @@ static inline int is_hwpoison_entry(swp_entry_t entry)
|
|||
return swp_type(entry) == SWP_HWPOISON;
|
||||
}
|
||||
|
||||
static inline unsigned long hwpoison_entry_to_pfn(swp_entry_t entry)
|
||||
{
|
||||
return swp_offset(entry);
|
||||
}
|
||||
|
||||
static inline void num_poisoned_pages_inc(void)
|
||||
{
|
||||
atomic_long_inc(&num_poisoned_pages);
|
||||
|
|
|
@ -6,11 +6,9 @@
|
|||
|
||||
#ifdef CONFIG_PRINTK
|
||||
|
||||
#define PRINTK_SAFE_CONTEXT_MASK 0x007ffffff
|
||||
#define PRINTK_NMI_DIRECT_CONTEXT_MASK 0x008000000
|
||||
#define PRINTK_NMI_CONTEXT_MASK 0xff0000000
|
||||
|
||||
#define PRINTK_NMI_CONTEXT_OFFSET 0x010000000
|
||||
#define PRINTK_SAFE_CONTEXT_MASK 0x3fffffff
|
||||
#define PRINTK_NMI_DIRECT_CONTEXT_MASK 0x40000000
|
||||
#define PRINTK_NMI_CONTEXT_MASK 0x80000000
|
||||
|
||||
extern raw_spinlock_t logbuf_lock;
|
||||
|
||||
|
|
|
@ -303,12 +303,12 @@ static __printf(1, 0) int vprintk_nmi(const char *fmt, va_list args)
|
|||
|
||||
void notrace printk_nmi_enter(void)
|
||||
{
|
||||
this_cpu_add(printk_context, PRINTK_NMI_CONTEXT_OFFSET);
|
||||
this_cpu_or(printk_context, PRINTK_NMI_CONTEXT_MASK);
|
||||
}
|
||||
|
||||
void notrace printk_nmi_exit(void)
|
||||
{
|
||||
this_cpu_sub(printk_context, PRINTK_NMI_CONTEXT_OFFSET);
|
||||
this_cpu_and(printk_context, ~PRINTK_NMI_CONTEXT_MASK);
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
24
mm/filemap.c
24
mm/filemap.c
|
@ -3533,6 +3533,10 @@ again:
|
|||
* Otherwise there's a nasty deadlock on copying from the
|
||||
* same page as we're writing to, without it being marked
|
||||
* up-to-date.
|
||||
*
|
||||
* Not only is this an optimisation, but it is also required
|
||||
* to check that the address is actually valid, when atomic
|
||||
* usercopies are used, below.
|
||||
*/
|
||||
if (unlikely(iov_iter_fault_in_readable(i, bytes))) {
|
||||
status = -EFAULT;
|
||||
|
@ -3559,22 +3563,24 @@ again:
|
|||
page, fsdata);
|
||||
if (unlikely(status < 0))
|
||||
break;
|
||||
copied = status;
|
||||
|
||||
cond_resched();
|
||||
|
||||
if (unlikely(status == 0)) {
|
||||
iov_iter_advance(i, copied);
|
||||
if (unlikely(copied == 0)) {
|
||||
/*
|
||||
* A short copy made ->write_end() reject the
|
||||
* thing entirely. Might be memory poisoning
|
||||
* halfway through, might be a race with munmap,
|
||||
* might be severe memory pressure.
|
||||
* If we were unable to copy any data at all, we must
|
||||
* fall back to a single segment length write.
|
||||
*
|
||||
* If we didn't fallback here, we could livelock
|
||||
* because not all segments in the iov can be copied at
|
||||
* once without a pagefault.
|
||||
*/
|
||||
if (copied)
|
||||
bytes = copied;
|
||||
bytes = min_t(unsigned long, PAGE_SIZE - offset,
|
||||
iov_iter_single_seg_count(i));
|
||||
goto again;
|
||||
}
|
||||
copied = status;
|
||||
iov_iter_advance(i, copied);
|
||||
pos += copied;
|
||||
written += copied;
|
||||
|
||||
|
|
|
@ -56,7 +56,6 @@
|
|||
#include <linux/kfifo.h>
|
||||
#include <linux/ratelimit.h>
|
||||
#include <linux/page-isolation.h>
|
||||
#include <linux/pagewalk.h>
|
||||
#include "internal.h"
|
||||
#include "ras/ras_event.h"
|
||||
|
||||
|
@ -528,150 +527,6 @@ static void collect_procs(struct page *page, struct list_head *tokill,
|
|||
kfree(tk);
|
||||
}
|
||||
|
||||
struct hwp_walk {
|
||||
struct to_kill tk;
|
||||
unsigned long pfn;
|
||||
int flags;
|
||||
};
|
||||
|
||||
static void set_to_kill(struct to_kill *tk, unsigned long addr, short shift)
|
||||
{
|
||||
tk->addr = addr;
|
||||
tk->size_shift = shift;
|
||||
}
|
||||
|
||||
static int check_hwpoisoned_entry(pte_t pte, unsigned long addr, short shift,
|
||||
unsigned long poisoned_pfn, struct to_kill *tk)
|
||||
{
|
||||
unsigned long pfn = 0;
|
||||
|
||||
if (pte_present(pte)) {
|
||||
pfn = pte_pfn(pte);
|
||||
} else {
|
||||
swp_entry_t swp = pte_to_swp_entry(pte);
|
||||
|
||||
if (is_hwpoison_entry(swp))
|
||||
pfn = hwpoison_entry_to_pfn(swp);
|
||||
}
|
||||
|
||||
if (!pfn || pfn != poisoned_pfn)
|
||||
return 0;
|
||||
|
||||
set_to_kill(tk, addr, shift);
|
||||
return 1;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
|
||||
static int check_hwpoisoned_pmd_entry(pmd_t *pmdp, unsigned long addr,
|
||||
struct hwp_walk *hwp)
|
||||
{
|
||||
pmd_t pmd = *pmdp;
|
||||
unsigned long pfn;
|
||||
unsigned long hwpoison_vaddr;
|
||||
|
||||
if (!pmd_present(pmd))
|
||||
return 0;
|
||||
pfn = pmd_pfn(pmd);
|
||||
if (pfn <= hwp->pfn && hwp->pfn < pfn + HPAGE_PMD_NR) {
|
||||
hwpoison_vaddr = addr + ((hwp->pfn - pfn) << PAGE_SHIFT);
|
||||
set_to_kill(&hwp->tk, hwpoison_vaddr, PAGE_SHIFT);
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
#else
|
||||
static int check_hwpoisoned_pmd_entry(pmd_t *pmdp, unsigned long addr,
|
||||
struct hwp_walk *hwp)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
static int hwpoison_pte_range(pmd_t *pmdp, unsigned long addr,
|
||||
unsigned long end, struct mm_walk *walk)
|
||||
{
|
||||
struct hwp_walk *hwp = (struct hwp_walk *)walk->private;
|
||||
int ret = 0;
|
||||
pte_t *ptep;
|
||||
spinlock_t *ptl;
|
||||
|
||||
ptl = pmd_trans_huge_lock(pmdp, walk->vma);
|
||||
if (ptl) {
|
||||
ret = check_hwpoisoned_pmd_entry(pmdp, addr, hwp);
|
||||
spin_unlock(ptl);
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (pmd_trans_unstable(pmdp))
|
||||
goto out;
|
||||
|
||||
ptep = pte_offset_map_lock(walk->vma->vm_mm, pmdp, addr, &ptl);
|
||||
for (; addr != end; ptep++, addr += PAGE_SIZE) {
|
||||
ret = check_hwpoisoned_entry(*ptep, addr, PAGE_SHIFT,
|
||||
hwp->pfn, &hwp->tk);
|
||||
if (ret == 1)
|
||||
break;
|
||||
}
|
||||
pte_unmap_unlock(ptep - 1, ptl);
|
||||
out:
|
||||
cond_resched();
|
||||
return ret;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_HUGETLB_PAGE
|
||||
static int hwpoison_hugetlb_range(pte_t *ptep, unsigned long hmask,
|
||||
unsigned long addr, unsigned long end,
|
||||
struct mm_walk *walk)
|
||||
{
|
||||
struct hwp_walk *hwp = (struct hwp_walk *)walk->private;
|
||||
pte_t pte = huge_ptep_get(ptep);
|
||||
struct hstate *h = hstate_vma(walk->vma);
|
||||
|
||||
return check_hwpoisoned_entry(pte, addr, huge_page_shift(h),
|
||||
hwp->pfn, &hwp->tk);
|
||||
}
|
||||
#else
|
||||
#define hwpoison_hugetlb_range NULL
|
||||
#endif
|
||||
|
||||
static struct mm_walk_ops hwp_walk_ops = {
|
||||
.pmd_entry = hwpoison_pte_range,
|
||||
.hugetlb_entry = hwpoison_hugetlb_range,
|
||||
};
|
||||
|
||||
/*
|
||||
* Sends SIGBUS to the current process with error info.
|
||||
*
|
||||
* This function is intended to handle "Action Required" MCEs on already
|
||||
* hardware poisoned pages. They could happen, for example, when
|
||||
* memory_failure() failed to unmap the error page at the first call, or
|
||||
* when multiple local machine checks happened on different CPUs.
|
||||
*
|
||||
* MCE handler currently has no easy access to the error virtual address,
|
||||
* so this function walks page table to find it. The returned virtual address
|
||||
* is proper in most cases, but it could be wrong when the application
|
||||
* process has multiple entries mapping the error page.
|
||||
*/
|
||||
static int kill_accessing_process(struct task_struct *p, unsigned long pfn,
|
||||
int flags)
|
||||
{
|
||||
int ret;
|
||||
struct hwp_walk priv = {
|
||||
.pfn = pfn,
|
||||
};
|
||||
priv.tk.tsk = p;
|
||||
|
||||
down_read(&(p->mm->mmap_sem));
|
||||
ret = walk_page_range(p->mm, 0, TASK_SIZE, &hwp_walk_ops,
|
||||
(void *)&priv);
|
||||
if (ret == 1 && priv.tk.addr)
|
||||
kill_proc(&priv.tk, pfn, flags);
|
||||
else
|
||||
ret = 0;
|
||||
up_read(&(p->mm->mmap_sem));
|
||||
return ret > 0 ? -EHWPOISON : -EFAULT;
|
||||
}
|
||||
|
||||
static const char *action_name[] = {
|
||||
[MF_IGNORED] = "Ignored",
|
||||
[MF_FAILED] = "Failed",
|
||||
|
@ -775,7 +630,6 @@ static int truncate_error_page(struct page *p, unsigned long pfn,
|
|||
*/
|
||||
static int me_kernel(struct page *p, unsigned long pfn)
|
||||
{
|
||||
unlock_page(p);
|
||||
return MF_IGNORED;
|
||||
}
|
||||
|
||||
|
@ -785,7 +639,6 @@ static int me_kernel(struct page *p, unsigned long pfn)
|
|||
static int me_unknown(struct page *p, unsigned long pfn)
|
||||
{
|
||||
pr_err("Memory failure: %#lx: Unknown page state\n", pfn);
|
||||
unlock_page(p);
|
||||
return MF_FAILED;
|
||||
}
|
||||
|
||||
|
@ -794,7 +647,6 @@ static int me_unknown(struct page *p, unsigned long pfn)
|
|||
*/
|
||||
static int me_pagecache_clean(struct page *p, unsigned long pfn)
|
||||
{
|
||||
int ret;
|
||||
struct address_space *mapping;
|
||||
|
||||
delete_from_lru_cache(p);
|
||||
|
@ -803,10 +655,8 @@ static int me_pagecache_clean(struct page *p, unsigned long pfn)
|
|||
* For anonymous pages we're done the only reference left
|
||||
* should be the one m_f() holds.
|
||||
*/
|
||||
if (PageAnon(p)) {
|
||||
ret = MF_RECOVERED;
|
||||
goto out;
|
||||
}
|
||||
if (PageAnon(p))
|
||||
return MF_RECOVERED;
|
||||
|
||||
/*
|
||||
* Now truncate the page in the page cache. This is really
|
||||
|
@ -820,8 +670,7 @@ static int me_pagecache_clean(struct page *p, unsigned long pfn)
|
|||
/*
|
||||
* Page has been teared down in the meanwhile
|
||||
*/
|
||||
ret = MF_FAILED;
|
||||
goto out;
|
||||
return MF_FAILED;
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -829,10 +678,7 @@ static int me_pagecache_clean(struct page *p, unsigned long pfn)
|
|||
*
|
||||
* Open: to take i_mutex or not for this? Right now we don't.
|
||||
*/
|
||||
ret = truncate_error_page(p, pfn, mapping);
|
||||
out:
|
||||
unlock_page(p);
|
||||
return ret;
|
||||
return truncate_error_page(p, pfn, mapping);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -908,26 +754,24 @@ static int me_pagecache_dirty(struct page *p, unsigned long pfn)
|
|||
*/
|
||||
static int me_swapcache_dirty(struct page *p, unsigned long pfn)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ClearPageDirty(p);
|
||||
/* Trigger EIO in shmem: */
|
||||
ClearPageUptodate(p);
|
||||
|
||||
ret = delete_from_lru_cache(p) ? MF_FAILED : MF_DELAYED;
|
||||
unlock_page(p);
|
||||
return ret;
|
||||
if (!delete_from_lru_cache(p))
|
||||
return MF_DELAYED;
|
||||
else
|
||||
return MF_FAILED;
|
||||
}
|
||||
|
||||
static int me_swapcache_clean(struct page *p, unsigned long pfn)
|
||||
{
|
||||
int ret;
|
||||
|
||||
delete_from_swap_cache(p);
|
||||
|
||||
ret = delete_from_lru_cache(p) ? MF_FAILED : MF_RECOVERED;
|
||||
unlock_page(p);
|
||||
return ret;
|
||||
if (!delete_from_lru_cache(p))
|
||||
return MF_RECOVERED;
|
||||
else
|
||||
return MF_FAILED;
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -948,7 +792,6 @@ static int me_huge_page(struct page *p, unsigned long pfn)
|
|||
mapping = page_mapping(hpage);
|
||||
if (mapping) {
|
||||
res = truncate_error_page(hpage, pfn, mapping);
|
||||
unlock_page(hpage);
|
||||
} else {
|
||||
unlock_page(hpage);
|
||||
/*
|
||||
|
@ -960,6 +803,7 @@ static int me_huge_page(struct page *p, unsigned long pfn)
|
|||
put_page(hpage);
|
||||
dissolve_free_huge_page(p);
|
||||
res = MF_RECOVERED;
|
||||
lock_page(hpage);
|
||||
}
|
||||
|
||||
return res;
|
||||
|
@ -992,8 +836,6 @@ static struct page_state {
|
|||
unsigned long mask;
|
||||
unsigned long res;
|
||||
enum mf_action_page_type type;
|
||||
|
||||
/* Callback ->action() has to unlock the relevant page inside it. */
|
||||
int (*action)(struct page *p, unsigned long pfn);
|
||||
} error_states[] = {
|
||||
{ reserved, reserved, MF_MSG_KERNEL, me_kernel },
|
||||
|
@ -1058,7 +900,6 @@ static int page_action(struct page_state *ps, struct page *p,
|
|||
int result;
|
||||
int count;
|
||||
|
||||
/* page p should be unlocked after returning from ps->action(). */
|
||||
result = ps->action(p, pfn);
|
||||
|
||||
count = page_count(p) - 1;
|
||||
|
@ -1250,10 +1091,7 @@ static int memory_failure_hugetlb(unsigned long pfn, int flags)
|
|||
if (TestSetPageHWPoison(head)) {
|
||||
pr_err("Memory failure: %#lx: already hardware poisoned\n",
|
||||
pfn);
|
||||
res = -EHWPOISON;
|
||||
if (flags & MF_ACTION_REQUIRED)
|
||||
res = kill_accessing_process(current, page_to_pfn(head), flags);
|
||||
return res;
|
||||
return 0;
|
||||
}
|
||||
|
||||
num_poisoned_pages_inc();
|
||||
|
@ -1309,7 +1147,7 @@ static int memory_failure_hugetlb(unsigned long pfn, int flags)
|
|||
goto out;
|
||||
}
|
||||
|
||||
return identify_page_state(pfn, p, page_flags);
|
||||
res = identify_page_state(pfn, p, page_flags);
|
||||
out:
|
||||
unlock_page(head);
|
||||
return res;
|
||||
|
@ -1413,9 +1251,8 @@ int memory_failure(unsigned long pfn, int flags)
|
|||
struct page *hpage;
|
||||
struct page *orig_head;
|
||||
struct dev_pagemap *pgmap;
|
||||
int res = 0;
|
||||
int res;
|
||||
unsigned long page_flags;
|
||||
static DEFINE_MUTEX(mf_mutex);
|
||||
|
||||
if (!sysctl_memory_failure_recovery)
|
||||
panic("Memory failure on page %lx", pfn);
|
||||
|
@ -1433,20 +1270,12 @@ int memory_failure(unsigned long pfn, int flags)
|
|||
return -ENXIO;
|
||||
}
|
||||
|
||||
mutex_lock(&mf_mutex);
|
||||
|
||||
if (PageHuge(p)) {
|
||||
res = memory_failure_hugetlb(pfn, flags);
|
||||
goto unlock_mutex;
|
||||
}
|
||||
|
||||
if (PageHuge(p))
|
||||
return memory_failure_hugetlb(pfn, flags);
|
||||
if (TestSetPageHWPoison(p)) {
|
||||
pr_err("Memory failure: %#lx: already hardware poisoned\n",
|
||||
pfn);
|
||||
res = -EHWPOISON;
|
||||
if (flags & MF_ACTION_REQUIRED)
|
||||
res = kill_accessing_process(current, pfn, flags);
|
||||
goto unlock_mutex;
|
||||
return 0;
|
||||
}
|
||||
|
||||
orig_head = hpage = compound_head(p);
|
||||
|
@ -1466,12 +1295,11 @@ int memory_failure(unsigned long pfn, int flags)
|
|||
if (!(flags & MF_COUNT_INCREASED) && !get_hwpoison_page(p)) {
|
||||
if (is_free_buddy_page(p)) {
|
||||
action_result(pfn, MF_MSG_BUDDY, MF_DELAYED);
|
||||
res = 0;
|
||||
return 0;
|
||||
} else {
|
||||
action_result(pfn, MF_MSG_KERNEL_HIGH_ORDER, MF_IGNORED);
|
||||
res = -EBUSY;
|
||||
return -EBUSY;
|
||||
}
|
||||
goto unlock_mutex;
|
||||
}
|
||||
|
||||
if (PageTransHuge(hpage)) {
|
||||
|
@ -1487,8 +1315,7 @@ int memory_failure(unsigned long pfn, int flags)
|
|||
if (TestClearPageHWPoison(p))
|
||||
num_poisoned_pages_dec();
|
||||
put_hwpoison_page(p);
|
||||
res = -EBUSY;
|
||||
goto unlock_mutex;
|
||||
return -EBUSY;
|
||||
}
|
||||
unlock_page(p);
|
||||
VM_BUG_ON_PAGE(!page_count(p), p);
|
||||
|
@ -1510,8 +1337,7 @@ int memory_failure(unsigned long pfn, int flags)
|
|||
action_result(pfn, MF_MSG_BUDDY, MF_DELAYED);
|
||||
else
|
||||
action_result(pfn, MF_MSG_BUDDY_2ND, MF_DELAYED);
|
||||
res = 0;
|
||||
goto unlock_mutex;
|
||||
return 0;
|
||||
}
|
||||
|
||||
lock_page(p);
|
||||
|
@ -1523,7 +1349,7 @@ int memory_failure(unsigned long pfn, int flags)
|
|||
if (PageCompound(p) && compound_head(p) != orig_head) {
|
||||
action_result(pfn, MF_MSG_DIFFERENT_COMPOUND, MF_IGNORED);
|
||||
res = -EBUSY;
|
||||
goto unlock_page;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -1546,14 +1372,14 @@ int memory_failure(unsigned long pfn, int flags)
|
|||
num_poisoned_pages_dec();
|
||||
unlock_page(p);
|
||||
put_hwpoison_page(p);
|
||||
goto unlock_mutex;
|
||||
return 0;
|
||||
}
|
||||
if (hwpoison_filter(p)) {
|
||||
if (TestClearPageHWPoison(p))
|
||||
num_poisoned_pages_dec();
|
||||
unlock_page(p);
|
||||
put_hwpoison_page(p);
|
||||
goto unlock_mutex;
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (!PageTransTail(p) && !PageLRU(p))
|
||||
|
@ -1575,7 +1401,7 @@ int memory_failure(unsigned long pfn, int flags)
|
|||
if (!hwpoison_user_mappings(p, pfn, flags, &hpage)) {
|
||||
action_result(pfn, MF_MSG_UNMAP_FAILED, MF_IGNORED);
|
||||
res = -EBUSY;
|
||||
goto unlock_page;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -1584,17 +1410,13 @@ int memory_failure(unsigned long pfn, int flags)
|
|||
if (PageLRU(p) && !PageSwapCache(p) && p->mapping == NULL) {
|
||||
action_result(pfn, MF_MSG_TRUNCATED_LRU, MF_IGNORED);
|
||||
res = -EBUSY;
|
||||
goto unlock_page;
|
||||
goto out;
|
||||
}
|
||||
|
||||
identify_page_state:
|
||||
res = identify_page_state(pfn, p, page_flags);
|
||||
mutex_unlock(&mf_mutex);
|
||||
return res;
|
||||
unlock_page:
|
||||
out:
|
||||
unlock_page(p);
|
||||
unlock_mutex:
|
||||
mutex_unlock(&mf_mutex);
|
||||
return res;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(memory_failure);
|
||||
|
|
Loading…
Reference in New Issue