powerpc fixes for 5.9 #3

Add perf support for emitting extended registers for power10.
 
 A fix for CPU hotplug on pseries, where on large/loaded systems we may not wait
 long enough for the CPU to be offlined, leading to crashes.
 
 Addition of a raw cputable entry for Power10, which is not required to boot, but
 is required to make our PMU setup work correctly in guests.
 
 Three fixes for the recent changes on 32-bit Book3S to move modules into their
 own segment for strict RWX.
 
 A fix for a recent change in our powernv PCI code that could lead to crashes.
 
 A change to our perf interrupt accounting to avoid soft lockups when using some
 events, found by syzkaller.
 
 A change in the way we handle power loss events from the hypervisor on pseries.
 We no longer immediately shut down if we're told we're running on a UPS.
 
 A few other minor fixes.
 
 Thanks to:
   Alexey Kardashevskiy, Andreas Schwab, Aneesh Kumar K.V, Anju T Sudhakar,
   Athira Rajeev, Christophe Leroy, Frederic Barrat, Greg Kurz, Kajol Jain,
   Madhavan Srinivasan, Michael Neuling, Michael Roth, Nageswara R Sastry, Oliver
   O'Halloran, Thiago Jung Bauermann, Vaidyanathan Srinivasan, Vasant Hegde.
 -----BEGIN PGP SIGNATURE-----
 
 iQJHBAABCAAxFiEEJFGtCPCthwEv2Y/bUevqPMjhpYAFAl9CYMwTHG1wZUBlbGxl
 cm1hbi5pZC5hdQAKCRBR6+o8yOGlgC/wEACljEVnfHzUObmIgqn9Ru3JlfEI6Hlk
 ts7kajCgS/I/bV6DoDMZ8rlZX87QFOwiBkNM1I+vGHSLAuzsmFAnbFPyxw/idxpQ
 XUoNy8OCvbbzCPzChYdiU0PxW2h2i+QxkmktlWSN1SAPudJUWvoPS2Y4+sC4zksk
 B4B6tbW2DT8TFO1kKeZsU9r2t+EH5KwlIOi+uxbH8d76lJINKkBNSnjzMytl7drM
 TZx/HWr8+s/WJo1787x6bv8gxs5tV9b4vIKt2YZNTY2kvYsEDE+fBR1XfCAneXMw
 ASYnZV+/xCLIUpRF6DI4RAShLBT/Sfiy1yMTndZgfqAgquokFosszNx2zrk0IzCd
 AgqX93YGbGz/H72W3Y/B0W9+74XyO/u2D9zhNpkCRMpdcsM5MbvOQrQA5Ustu47E
 av5MOaF/nNCd8J+OC4Qjgt5VFb/s0h4FdtrwT80srOa2U6Of9cD/T6xAfOszSJ96
 cWdSb5qhn5wuD9pP32KjwdmWBiUw38/gnRGKpRlOVzyHL/GKZijyaBbWBlkoEmty
 0nbjWW/IVfsOb5Weuiybg541h/QOVuOkb2pOvPClITiH83MY/AciDJ+auo4M//hW
 haKz9IgV/KctmzDE+v9d0BD8sGmW03YUcQAPdRufI0eGXijDLcnHeuk2B3Nu84Pq
 8mtev+VQ+T6cZA==
 =sdJ1
 -----END PGP SIGNATURE-----

Merge tag 'powerpc-5.9-3' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux

Pull powerpc fixes from Michael Ellerman:

 - Add perf support for emitting extended registers for power10.

 - A fix for CPU hotplug on pseries, where on large/loaded systems we
   may not wait long enough for the CPU to be offlined, leading to
   crashes.

 - Addition of a raw cputable entry for Power10, which is not required
   to boot, but is required to make our PMU setup work correctly in
   guests.

 - Three fixes for the recent changes on 32-bit Book3S to move modules
   into their own segment for strict RWX.

 - A fix for a recent change in our powernv PCI code that could lead to
   crashes.

 - A change to our perf interrupt accounting to avoid soft lockups when
   using some events, found by syzkaller.

 - A change in the way we handle power loss events from the hypervisor
   on pseries. We no longer immediately shut down if we're told we're
   running on a UPS.

 - A few other minor fixes.

Thanks to Alexey Kardashevskiy, Andreas Schwab, Aneesh Kumar K.V, Anju T
Sudhakar, Athira Rajeev, Christophe Leroy, Frederic Barrat, Greg Kurz,
Kajol Jain, Madhavan Srinivasan, Michael Neuling, Michael Roth,
Nageswara R Sastry, Oliver O'Halloran, Thiago Jung Bauermann,
Vaidyanathan Srinivasan, Vasant Hegde.

* tag 'powerpc-5.9-3' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux:
  powerpc/perf/hv-24x7: Move cpumask file to top folder of hv-24x7 driver
  powerpc/32s: Fix module loading failure when VMALLOC_END is over 0xf0000000
  powerpc/pseries: Do not initiate shutdown when system is running on UPS
  powerpc/perf: Fix soft lockups due to missed interrupt accounting
  powerpc/powernv/pci: Fix possible crash when releasing DMA resources
  powerpc/pseries/hotplug-cpu: wait indefinitely for vCPU death
  powerpc/32s: Fix is_module_segment() when MODULES_VADDR is defined
  powerpc/kasan: Fix KASAN_SHADOW_START on BOOK3S_32
  powerpc/fixmap: Fix the size of the early debug area
  powerpc/pkeys: Fix build error with PPC_MEM_KEYS disabled
  powerpc/kernel: Cleanup machine check function declarations
  powerpc: Add POWER10 raw mode cputable entry
  powerpc/perf: Add extended regs support for power10 platform
  powerpc/perf: Add support for outputting extended regs in perf intr_regs
  powerpc: Fix P10 PVR revision in /proc/cpuinfo for SMT4 cores
This commit is contained in:
Linus Torvalds 2020-08-23 11:37:23 -07:00
commit cb95712138
21 changed files with 161 additions and 25 deletions

View File

@ -43,7 +43,7 @@ Description: read only
This sysfs interface exposes the number of cores per chip
present in the system.
What: /sys/devices/hv_24x7/interface/cpumask
What: /sys/devices/hv_24x7/cpumask
Date: July 2020
Contact: Linux on PowerPC Developer List <linuxppc-dev@lists.ozlabs.org>
Description: read only

View File

@ -9,6 +9,11 @@
#ifndef __ASSEMBLY__
/*
* Added to include __machine_check_early_realmode_* functions
*/
#include <asm/mce.h>
/* This structure can grow, it's real size is used by head.S code
* via the mkdefs mechanism.
*/

View File

@ -52,7 +52,7 @@ enum fixed_addresses {
FIX_HOLE,
/* reserve the top 128K for early debugging purposes */
FIX_EARLY_DEBUG_TOP = FIX_HOLE,
FIX_EARLY_DEBUG_BASE = FIX_EARLY_DEBUG_TOP+(ALIGN(SZ_128, PAGE_SIZE)/PAGE_SIZE)-1,
FIX_EARLY_DEBUG_BASE = FIX_EARLY_DEBUG_TOP+(ALIGN(SZ_128K, PAGE_SIZE)/PAGE_SIZE)-1,
#ifdef CONFIG_HIGHMEM
FIX_KMAP_BEGIN, /* reserved pte's for temporary kernel mappings */
FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1,

View File

@ -15,11 +15,18 @@
#ifndef __ASSEMBLY__
#include <asm/page.h>
#include <linux/sizes.h>
#define KASAN_SHADOW_SCALE_SHIFT 3
#if defined(CONFIG_PPC_BOOK3S_32) && defined(CONFIG_MODULES) && defined(CONFIG_STRICT_KERNEL_RWX)
#define KASAN_KERN_START ALIGN_DOWN(PAGE_OFFSET - SZ_256M, SZ_256M)
#else
#define KASAN_KERN_START PAGE_OFFSET
#endif
#define KASAN_SHADOW_START (KASAN_SHADOW_OFFSET + \
(PAGE_OFFSET >> KASAN_SHADOW_SCALE_SHIFT))
(KASAN_KERN_START >> KASAN_SHADOW_SCALE_SHIFT))
#define KASAN_SHADOW_OFFSET ASM_CONST(CONFIG_KASAN_SHADOW_OFFSET)

View File

@ -210,6 +210,9 @@ struct mce_error_info {
#define MCE_EVENT_RELEASE true
#define MCE_EVENT_DONTRELEASE false
struct pt_regs;
struct notifier_block;
extern void save_mce_event(struct pt_regs *regs, long handled,
struct mce_error_info *mce_err, uint64_t nip,
uint64_t addr, uint64_t phys_addr);
@ -225,5 +228,9 @@ int mce_register_notifier(struct notifier_block *nb);
int mce_unregister_notifier(struct notifier_block *nb);
#ifdef CONFIG_PPC_BOOK3S_64
void flush_and_reload_slb(void);
long __machine_check_early_realmode_p7(struct pt_regs *regs);
long __machine_check_early_realmode_p8(struct pt_regs *regs);
long __machine_check_early_realmode_p9(struct pt_regs *regs);
long __machine_check_early_realmode_p10(struct pt_regs *regs);
#endif /* CONFIG_PPC_BOOK3S_64 */
#endif /* __ASM_PPC64_MCE_H__ */

View File

@ -40,4 +40,7 @@ static inline bool is_sier_available(void) { return false; }
/* To support perf_regs sier update */
extern bool is_sier_available(void);
/* To define perf extended regs mask value */
extern u64 PERF_REG_EXTENDED_MASK;
#define PERF_REG_EXTENDED_MASK PERF_REG_EXTENDED_MASK
#endif

View File

@ -62,6 +62,11 @@ struct power_pmu {
int *blacklist_ev;
/* BHRB entries in the PMU */
int bhrb_nr;
/*
* set this flag with `PERF_PMU_CAP_EXTENDED_REGS` if
* the pmu supports extended perf regs capability
*/
int capabilities;
};
/*

View File

@ -48,6 +48,24 @@ enum perf_event_powerpc_regs {
PERF_REG_POWERPC_DSISR,
PERF_REG_POWERPC_SIER,
PERF_REG_POWERPC_MMCRA,
PERF_REG_POWERPC_MAX,
/* Extended registers */
PERF_REG_POWERPC_MMCR0,
PERF_REG_POWERPC_MMCR1,
PERF_REG_POWERPC_MMCR2,
PERF_REG_POWERPC_MMCR3,
PERF_REG_POWERPC_SIER2,
PERF_REG_POWERPC_SIER3,
/* Max regs without the extended regs */
PERF_REG_POWERPC_MAX = PERF_REG_POWERPC_MMCRA + 1,
};
#define PERF_REG_PMU_MASK ((1ULL << PERF_REG_POWERPC_MAX) - 1)
/* PERF_REG_EXTENDED_MASK value for CPU_FTR_ARCH_300 */
#define PERF_REG_PMU_MASK_300 (((1ULL << (PERF_REG_POWERPC_MMCR2 + 1)) - 1) - PERF_REG_PMU_MASK)
/* PERF_REG_EXTENDED_MASK value for CPU_FTR_ARCH_31 */
#define PERF_REG_PMU_MASK_31 (((1ULL << (PERF_REG_POWERPC_SIER3 + 1)) - 1) - PERF_REG_PMU_MASK)
#define PERF_REG_MAX_ISA_300 (PERF_REG_POWERPC_MMCR2 + 1)
#define PERF_REG_MAX_ISA_31 (PERF_REG_POWERPC_SIER3 + 1)
#endif /* _UAPI_ASM_POWERPC_PERF_REGS_H */

View File

@ -72,9 +72,6 @@ extern void __setup_cpu_power9(unsigned long offset, struct cpu_spec* spec);
extern void __restore_cpu_power9(void);
extern void __setup_cpu_power10(unsigned long offset, struct cpu_spec* spec);
extern void __restore_cpu_power10(void);
extern long __machine_check_early_realmode_p7(struct pt_regs *regs);
extern long __machine_check_early_realmode_p8(struct pt_regs *regs);
extern long __machine_check_early_realmode_p9(struct pt_regs *regs);
#endif /* CONFIG_PPC64 */
#if defined(CONFIG_E500)
extern void __setup_cpu_e5500(unsigned long offset, struct cpu_spec* spec);
@ -542,6 +539,25 @@ static struct cpu_spec __initdata cpu_specs[] = {
.machine_check_early = __machine_check_early_realmode_p9,
.platform = "power9",
},
{ /* Power10 */
.pvr_mask = 0xffff0000,
.pvr_value = 0x00800000,
.cpu_name = "POWER10 (raw)",
.cpu_features = CPU_FTRS_POWER10,
.cpu_user_features = COMMON_USER_POWER10,
.cpu_user_features2 = COMMON_USER2_POWER10,
.mmu_features = MMU_FTRS_POWER10,
.icache_bsize = 128,
.dcache_bsize = 128,
.num_pmcs = 6,
.pmc_type = PPC_PMC_IBM,
.oprofile_cpu_type = "ppc64/power10",
.oprofile_type = PPC_OPROFILE_INVALID,
.cpu_setup = __setup_cpu_power10,
.cpu_restore = __restore_cpu_power10,
.machine_check_early = __machine_check_early_realmode_p10,
.platform = "power10",
},
{ /* Cell Broadband Engine */
.pvr_mask = 0xffff0000,
.pvr_value = 0x00700000,

View File

@ -64,10 +64,6 @@ struct dt_cpu_feature {
* Set up the base CPU
*/
extern long __machine_check_early_realmode_p8(struct pt_regs *regs);
extern long __machine_check_early_realmode_p9(struct pt_regs *regs);
extern long __machine_check_early_realmode_p10(struct pt_regs *regs);
static int hv_mode;
static struct {

View File

@ -311,6 +311,7 @@ static int show_cpuinfo(struct seq_file *m, void *v)
min = pvr & 0xFF;
break;
case 0x004e: /* POWER9 bits 12-15 give chip type */
case 0x0080: /* POWER10 bit 12 gives SMT8/4 */
maj = (pvr >> 8) & 0x0F;
min = pvr & 0xFF;
break;

View File

@ -191,10 +191,17 @@ static bool is_module_segment(unsigned long addr)
{
if (!IS_ENABLED(CONFIG_MODULES))
return false;
#ifdef MODULES_VADDR
if (addr < ALIGN_DOWN(MODULES_VADDR, SZ_256M))
return false;
if (addr > ALIGN(MODULES_END, SZ_256M) - 1)
return false;
#else
if (addr < ALIGN_DOWN(VMALLOC_START, SZ_256M))
return false;
if (addr >= ALIGN(VMALLOC_END, SZ_256M))
if (addr > ALIGN(VMALLOC_END, SZ_256M) - 1)
return false;
#endif
return true;
}

View File

@ -1115,8 +1115,10 @@ void hash__early_init_mmu_secondary(void)
&& cpu_has_feature(CPU_FTR_HVMODE))
tlbiel_all();
if (IS_ENABLED(CONFIG_PPC_MEM_KEYS) && mmu_has_feature(MMU_FTR_PKEY))
#ifdef CONFIG_PPC_MEM_KEYS
if (mmu_has_feature(MMU_FTR_PKEY))
mtspr(SPRN_UAMOR, default_uamor);
#endif
}
#endif /* CONFIG_SMP */

View File

@ -2141,6 +2141,10 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
if (perf_event_overflow(event, &data, regs))
power_pmu_stop(event, 0);
} else if (period) {
/* Account for interrupt in case of invalid SIAR */
if (perf_event_account_interrupt(event))
power_pmu_stop(event, 0);
}
}
@ -2323,6 +2327,7 @@ int register_power_pmu(struct power_pmu *pmu)
pmu->name);
power_pmu.attr_groups = ppmu->attr_groups;
power_pmu.capabilities |= (ppmu->capabilities & PERF_PMU_CAP_EXTENDED_REGS);
#ifdef MSR_HV
/*

View File

@ -1128,6 +1128,15 @@ static struct bin_attribute *if_bin_attrs[] = {
NULL,
};
static struct attribute *cpumask_attrs[] = {
&dev_attr_cpumask.attr,
NULL,
};
static struct attribute_group cpumask_attr_group = {
.attrs = cpumask_attrs,
};
static struct attribute *if_attrs[] = {
&dev_attr_catalog_len.attr,
&dev_attr_catalog_version.attr,
@ -1135,7 +1144,6 @@ static struct attribute *if_attrs[] = {
&dev_attr_sockets.attr,
&dev_attr_chipspersocket.attr,
&dev_attr_coresperchip.attr,
&dev_attr_cpumask.attr,
NULL,
};
@ -1151,6 +1159,7 @@ static const struct attribute_group *attr_groups[] = {
&event_desc_group,
&event_long_desc_group,
&if_group,
&cpumask_attr_group,
NULL,
};

View File

@ -13,9 +13,11 @@
#include <asm/ptrace.h>
#include <asm/perf_regs.h>
u64 PERF_REG_EXTENDED_MASK;
#define PT_REGS_OFFSET(id, r) [id] = offsetof(struct pt_regs, r)
#define REG_RESERVED (~((1ULL << PERF_REG_POWERPC_MAX) - 1))
#define REG_RESERVED (~(PERF_REG_EXTENDED_MASK | PERF_REG_PMU_MASK))
static unsigned int pt_regs_offset[PERF_REG_POWERPC_MAX] = {
PT_REGS_OFFSET(PERF_REG_POWERPC_R0, gpr[0]),
@ -69,10 +71,36 @@ static unsigned int pt_regs_offset[PERF_REG_POWERPC_MAX] = {
PT_REGS_OFFSET(PERF_REG_POWERPC_MMCRA, dsisr),
};
/* Function to return the extended register values */
static u64 get_ext_regs_value(int idx)
{
switch (idx) {
case PERF_REG_POWERPC_MMCR0:
return mfspr(SPRN_MMCR0);
case PERF_REG_POWERPC_MMCR1:
return mfspr(SPRN_MMCR1);
case PERF_REG_POWERPC_MMCR2:
return mfspr(SPRN_MMCR2);
#ifdef CONFIG_PPC64
case PERF_REG_POWERPC_MMCR3:
return mfspr(SPRN_MMCR3);
case PERF_REG_POWERPC_SIER2:
return mfspr(SPRN_SIER2);
case PERF_REG_POWERPC_SIER3:
return mfspr(SPRN_SIER3);
#endif
default: return 0;
}
}
u64 perf_reg_value(struct pt_regs *regs, int idx)
{
if (WARN_ON_ONCE(idx >= PERF_REG_POWERPC_MAX))
return 0;
u64 perf_reg_extended_max = PERF_REG_POWERPC_MAX;
if (cpu_has_feature(CPU_FTR_ARCH_31))
perf_reg_extended_max = PERF_REG_MAX_ISA_31;
else if (cpu_has_feature(CPU_FTR_ARCH_300))
perf_reg_extended_max = PERF_REG_MAX_ISA_300;
if (idx == PERF_REG_POWERPC_SIER &&
(IS_ENABLED(CONFIG_FSL_EMB_PERF_EVENT) ||
@ -85,6 +113,16 @@ u64 perf_reg_value(struct pt_regs *regs, int idx)
IS_ENABLED(CONFIG_PPC32)))
return 0;
if (idx >= PERF_REG_POWERPC_MAX && idx < perf_reg_extended_max)
return get_ext_regs_value(idx);
/*
* If the idx is referring to value beyond the
* supported registers, return 0 with a warning
*/
if (WARN_ON_ONCE(idx >= perf_reg_extended_max))
return 0;
return regs_get_register(regs, pt_regs_offset[idx]);
}

View File

@ -87,6 +87,8 @@
#define POWER10_MMCRA_IFM3 0x00000000C0000000UL
#define POWER10_MMCRA_BHRB_MASK 0x00000000C0000000UL
extern u64 PERF_REG_EXTENDED_MASK;
/* Table of alternatives, sorted by column 0 */
static const unsigned int power10_event_alternatives[][MAX_ALT] = {
{ PM_RUN_CYC_ALT, PM_RUN_CYC },
@ -397,6 +399,7 @@ static struct power_pmu power10_pmu = {
.cache_events = &power10_cache_events,
.attr_groups = power10_pmu_attr_groups,
.bhrb_nr = 32,
.capabilities = PERF_PMU_CAP_EXTENDED_REGS,
};
int init_power10_pmu(void)
@ -408,6 +411,9 @@ int init_power10_pmu(void)
strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power10"))
return -ENODEV;
/* Set the PERF_REG_EXTENDED_MASK here */
PERF_REG_EXTENDED_MASK = PERF_REG_PMU_MASK_31;
rc = register_power_pmu(&power10_pmu);
if (rc)
return rc;

View File

@ -90,6 +90,8 @@ enum {
#define POWER9_MMCRA_IFM3 0x00000000C0000000UL
#define POWER9_MMCRA_BHRB_MASK 0x00000000C0000000UL
extern u64 PERF_REG_EXTENDED_MASK;
/* Nasty Power9 specific hack */
#define PVR_POWER9_CUMULUS 0x00002000
@ -434,6 +436,7 @@ static struct power_pmu power9_pmu = {
.cache_events = &power9_cache_events,
.attr_groups = power9_pmu_attr_groups,
.bhrb_nr = 32,
.capabilities = PERF_PMU_CAP_EXTENDED_REGS,
};
int init_power9_pmu(void)
@ -457,6 +460,9 @@ int init_power9_pmu(void)
}
}
/* Set the PERF_REG_EXTENDED_MASK here */
PERF_REG_EXTENDED_MASK = PERF_REG_PMU_MASK_300;
rc = register_power_pmu(&power9_pmu);
if (rc)
return rc;

View File

@ -2705,7 +2705,7 @@ void pnv_pci_ioda2_release_pe_dma(struct pnv_ioda_pe *pe)
struct iommu_table *tbl = pe->table_group.tables[0];
int64_t rc;
if (pe->dma_setup_done)
if (!pe->dma_setup_done)
return;
rc = pnv_pci_ioda2_unset_window(&pe->table_group, 0);

View File

@ -107,22 +107,28 @@ static int pseries_cpu_disable(void)
*/
static void pseries_cpu_die(unsigned int cpu)
{
int tries;
int cpu_status = 1;
unsigned int pcpu = get_hard_smp_processor_id(cpu);
unsigned long timeout = jiffies + msecs_to_jiffies(120000);
for (tries = 0; tries < 25; tries++) {
while (true) {
cpu_status = smp_query_cpu_stopped(pcpu);
if (cpu_status == QCSS_STOPPED ||
cpu_status == QCSS_HARDWARE_ERROR)
break;
cpu_relax();
if (time_after(jiffies, timeout)) {
pr_warn("CPU %i (hwid %i) didn't die after 120 seconds\n",
cpu, pcpu);
timeout = jiffies + msecs_to_jiffies(120000);
}
cond_resched();
}
if (cpu_status != 0) {
printk("Querying DEAD? cpu %i (%i) shows %i\n",
cpu, pcpu, cpu_status);
if (cpu_status == QCSS_HARDWARE_ERROR) {
pr_warn("CPU %i (hwid %i) reported error while dying\n",
cpu, pcpu);
}
/* Isolation and deallocation are definitely done by

View File

@ -184,7 +184,6 @@ static void handle_system_shutdown(char event_modifier)
case EPOW_SHUTDOWN_ON_UPS:
pr_emerg("Loss of system power detected. System is running on"
" UPS/battery. Check RTAS error log for details\n");
orderly_poweroff(true);
break;
case EPOW_SHUTDOWN_LOSS_OF_CRITICAL_FUNCTIONS: