diff --git a/arch/powerpc/include/asm/book3s/64/hash.h b/arch/powerpc/include/asm/book3s/64/hash.h index 935adcd92a81..cc8cd656ccfe 100644 --- a/arch/powerpc/include/asm/book3s/64/hash.h +++ b/arch/powerpc/include/asm/book3s/64/hash.h @@ -212,7 +212,7 @@ extern int __meminit hash__vmemmap_create_mapping(unsigned long start, extern void hash__vmemmap_remove_mapping(unsigned long start, unsigned long page_size); -int hash__create_section_mapping(unsigned long start, unsigned long end); +int hash__create_section_mapping(unsigned long start, unsigned long end, int nid); int hash__remove_section_mapping(unsigned long start, unsigned long end); #endif /* !__ASSEMBLY__ */ diff --git a/arch/powerpc/include/asm/book3s/64/radix.h b/arch/powerpc/include/asm/book3s/64/radix.h index 365010f66570..705193e7192f 100644 --- a/arch/powerpc/include/asm/book3s/64/radix.h +++ b/arch/powerpc/include/asm/book3s/64/radix.h @@ -313,7 +313,7 @@ static inline unsigned long radix__get_tree_size(void) } #ifdef CONFIG_MEMORY_HOTPLUG -int radix__create_section_mapping(unsigned long start, unsigned long end); +int radix__create_section_mapping(unsigned long start, unsigned long end, int nid); int radix__remove_section_mapping(unsigned long start, unsigned long end); #endif /* CONFIG_MEMORY_HOTPLUG */ #endif /* __ASSEMBLY__ */ diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 7765a800ddae..b7d066b037da 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -436,15 +436,15 @@ struct openpic; extern void kvm_cma_reserve(void) __init; static inline void kvmppc_set_xics_phys(int cpu, unsigned long addr) { - paca[cpu].kvm_hstate.xics_phys = (void __iomem *)addr; + paca_ptrs[cpu]->kvm_hstate.xics_phys = (void __iomem *)addr; } static inline void kvmppc_set_xive_tima(int cpu, unsigned long phys_addr, void __iomem *virt_addr) { - paca[cpu].kvm_hstate.xive_tima_phys = (void __iomem *)phys_addr; - paca[cpu].kvm_hstate.xive_tima_virt = virt_addr; + paca_ptrs[cpu]->kvm_hstate.xive_tima_phys = (void __iomem *)phys_addr; + paca_ptrs[cpu]->kvm_hstate.xive_tima_virt = virt_addr; } static inline u32 kvmppc_get_xics_latch(void) @@ -458,7 +458,7 @@ static inline u32 kvmppc_get_xics_latch(void) static inline void kvmppc_set_host_ipi(int cpu, u8 host_ipi) { - paca[cpu].kvm_hstate.host_ipi = host_ipi; + paca_ptrs[cpu]->kvm_hstate.host_ipi = host_ipi; } static inline void kvmppc_fast_vcpu_kick(struct kvm_vcpu *vcpu) diff --git a/arch/powerpc/include/asm/lppaca.h b/arch/powerpc/include/asm/lppaca.h index d0a2a2f99564..65d589689f01 100644 --- a/arch/powerpc/include/asm/lppaca.h +++ b/arch/powerpc/include/asm/lppaca.h @@ -36,14 +36,16 @@ #include /* - * We only have to have statically allocated lppaca structs on - * legacy iSeries, which supports at most 64 cpus. - */ -#define NR_LPPACAS 1 - -/* - * The Hypervisor barfs if the lppaca crosses a page boundary. A 1k - * alignment is sufficient to prevent this + * The lppaca is the "virtual processor area" registered with the hypervisor, + * H_REGISTER_VPA etc. + * + * According to PAPR, the structure is 640 bytes long, must be L1 cache line + * aligned, and must not cross a 4kB boundary. Its size field must be at + * least 640 bytes (but may be more). + * + * Pre-v4.14 KVM hypervisors reject the VPA if its size field is smaller than + * 1kB, so we dynamically allocate 1kB and advertise size as 1kB, but keep + * this structure as the canonical 640 byte size. */ struct lppaca { /* cacheline 1 contains read-only data */ @@ -97,13 +99,11 @@ struct lppaca { __be32 page_ins; /* CMO Hint - # page ins by OS */ u8 reserved11[148]; - volatile __be64 dtl_idx; /* Dispatch Trace Log head index */ + volatile __be64 dtl_idx; /* Dispatch Trace Log head index */ u8 reserved12[96]; -} __attribute__((__aligned__(0x400))); +} ____cacheline_aligned; -extern struct lppaca lppaca[]; - -#define lppaca_of(cpu) (*paca[cpu].lppaca_ptr) +#define lppaca_of(cpu) (*paca_ptrs[cpu]->lppaca_ptr) /* * We are using a non architected field to determine if a partition is diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h index c97b41185ab7..4185f1c96125 100644 --- a/arch/powerpc/include/asm/paca.h +++ b/arch/powerpc/include/asm/paca.h @@ -47,7 +47,10 @@ extern unsigned int debug_smp_processor_id(void); /* from linux/smp.h */ #define get_paca() local_paca #endif +#ifdef CONFIG_PPC_PSERIES #define get_lppaca() (get_paca()->lppaca_ptr) +#endif + #define get_slb_shadow() (get_paca()->slb_shadow_ptr) struct task_struct; @@ -59,7 +62,7 @@ struct task_struct; * processor. */ struct paca_struct { -#ifdef CONFIG_PPC_BOOK3S +#ifdef CONFIG_PPC_PSERIES /* * Because hw_cpu_id, unlike other paca fields, is accessed * routinely from other CPUs (from the IRQ code), we stick to @@ -68,7 +71,8 @@ struct paca_struct { */ struct lppaca *lppaca_ptr; /* Pointer to LpPaca for PLIC */ -#endif /* CONFIG_PPC_BOOK3S */ +#endif /* CONFIG_PPC_PSERIES */ + /* * MAGIC: the spinlock functions in arch/powerpc/lib/locks.c * load lock_token and paca_index with a single lwz @@ -161,10 +165,14 @@ struct paca_struct { u64 saved_msr; /* MSR saved here by enter_rtas */ u16 trap_save; /* Used when bad stack is encountered */ u8 irq_soft_mask; /* mask for irq soft masking */ + u8 soft_enabled; /* irq soft-enable flag */ u8 irq_happened; /* irq happened while soft-disabled */ u8 io_sync; /* writel() needs spin_unlock sync */ u8 irq_work_pending; /* IRQ_WORK interrupt while soft-disable */ u8 nap_state_lost; /* NV GPR values lost in power7_idle */ +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE + u8 pmcregs_in_use; /* pseries puts this in lppaca */ +#endif u64 sprg_vdso; /* Saved user-visible sprg */ #ifdef CONFIG_PPC_TRANSACTIONAL_MEM u64 tm_scratch; /* TM scratch area for reclaim */ @@ -244,18 +252,20 @@ struct paca_struct { void *rfi_flush_fallback_area; u64 l1d_flush_size; #endif -}; +} ____cacheline_aligned; extern void copy_mm_to_paca(struct mm_struct *mm); -extern struct paca_struct *paca; +extern struct paca_struct **paca_ptrs; extern void initialise_paca(struct paca_struct *new_paca, int cpu); extern void setup_paca(struct paca_struct *new_paca); -extern void allocate_pacas(void); +extern void allocate_paca_ptrs(void); +extern void allocate_paca(int cpu); extern void free_unused_pacas(void); #else /* CONFIG_PPC64 */ -static inline void allocate_pacas(void) { }; +static inline void allocate_paca_ptrs(void) { }; +static inline void allocate_paca(int cpu) { }; static inline void free_unused_pacas(void) { }; #endif /* CONFIG_PPC64 */ diff --git a/arch/powerpc/include/asm/pmc.h b/arch/powerpc/include/asm/pmc.h index 5a9ede4962cb..7ac3586c38ab 100644 --- a/arch/powerpc/include/asm/pmc.h +++ b/arch/powerpc/include/asm/pmc.h @@ -31,10 +31,21 @@ void ppc_enable_pmcs(void); #ifdef CONFIG_PPC_BOOK3S_64 #include +#include static inline void ppc_set_pmu_inuse(int inuse) { - get_lppaca()->pmcregs_in_use = inuse; +#if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_KVM_BOOK3S_HV_POSSIBLE) + if (firmware_has_feature(FW_FEATURE_LPAR)) { +#ifdef CONFIG_PPC_PSERIES + get_lppaca()->pmcregs_in_use = inuse; +#endif + } else { +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE + get_paca()->pmcregs_in_use = inuse; +#endif + } +#endif } extern void power4_enable_pmcs(void); diff --git a/arch/powerpc/include/asm/setup.h b/arch/powerpc/include/asm/setup.h index bbcdf929be54..27fa52ed6d00 100644 --- a/arch/powerpc/include/asm/setup.h +++ b/arch/powerpc/include/asm/setup.h @@ -23,6 +23,7 @@ extern void reloc_got2(unsigned long); #define PTRRELOC(x) ((typeof(x)) add_reloc_offset((unsigned long)(x))) void check_for_initrd(void); +void mem_topology_setup(void); void initmem_init(void); void setup_panic(void); #define ARCH_PANIC_TIMEOUT 180 diff --git a/arch/powerpc/include/asm/smp.h b/arch/powerpc/include/asm/smp.h index fac963e10d39..cfecfee1194b 100644 --- a/arch/powerpc/include/asm/smp.h +++ b/arch/powerpc/include/asm/smp.h @@ -31,6 +31,7 @@ extern int boot_cpuid; extern int spinning_secondaries; +extern u32 *cpu_to_phys_id; extern void cpu_die(void); extern int cpu_to_chip_id(int cpu); @@ -170,12 +171,12 @@ static inline const struct cpumask *cpu_sibling_mask(int cpu) #ifdef CONFIG_PPC64 static inline int get_hard_smp_processor_id(int cpu) { - return paca[cpu].hw_cpu_id; + return paca_ptrs[cpu]->hw_cpu_id; } static inline void set_hard_smp_processor_id(int cpu, int phys) { - paca[cpu].hw_cpu_id = phys; + paca_ptrs[cpu]->hw_cpu_id = phys; } #else /* 32-bit */ diff --git a/arch/powerpc/include/asm/sparsemem.h b/arch/powerpc/include/asm/sparsemem.h index a7916ee6dfb6..bc66712bdc3c 100644 --- a/arch/powerpc/include/asm/sparsemem.h +++ b/arch/powerpc/include/asm/sparsemem.h @@ -17,7 +17,7 @@ #endif /* CONFIG_SPARSEMEM */ #ifdef CONFIG_MEMORY_HOTPLUG -extern int create_section_mapping(unsigned long start, unsigned long end); +extern int create_section_mapping(unsigned long start, unsigned long end, int nid); extern int remove_section_mapping(unsigned long start, unsigned long end); #ifdef CONFIG_PPC_BOOK3S_64 diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index daf809a9b88e..6bee65f3cfd3 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -221,12 +221,17 @@ int main(void) OFFSET(PACA_EXMC, paca_struct, exmc); OFFSET(PACA_EXSLB, paca_struct, exslb); OFFSET(PACA_EXNMI, paca_struct, exnmi); +#ifdef CONFIG_PPC_PSERIES OFFSET(PACALPPACAPTR, paca_struct, lppaca_ptr); +#endif OFFSET(PACA_SLBSHADOWPTR, paca_struct, slb_shadow_ptr); OFFSET(SLBSHADOW_STACKVSID, slb_shadow, save_area[SLB_NUM_BOLTED - 1].vsid); OFFSET(SLBSHADOW_STACKESID, slb_shadow, save_area[SLB_NUM_BOLTED - 1].esid); OFFSET(SLBSHADOW_SAVEAREA, slb_shadow, save_area); OFFSET(LPPACA_PMCINUSE, lppaca, pmcregs_in_use); +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE + OFFSET(PACA_PMCINUSE, paca_struct, pmcregs_in_use); +#endif OFFSET(LPPACA_DTLIDX, lppaca, dtl_idx); OFFSET(LPPACA_YIELDCOUNT, lppaca, yield_count); OFFSET(PACA_DTL_RIDX, paca_struct, dtl_ridx); diff --git a/arch/powerpc/kernel/crash.c b/arch/powerpc/kernel/crash.c index 00b215125d3e..17c8b99680f2 100644 --- a/arch/powerpc/kernel/crash.c +++ b/arch/powerpc/kernel/crash.c @@ -238,7 +238,7 @@ static void __maybe_unused crash_kexec_wait_realmode(int cpu) if (i == cpu) continue; - while (paca[i].kexec_state < KEXEC_STATE_REAL_MODE) { + while (paca_ptrs[i]->kexec_state < KEXEC_STATE_REAL_MODE) { barrier(); if (!cpu_possible(i) || !cpu_online(i) || (msecs <= 0)) break; diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S index a61151a6ea5e..6eca15f25c73 100644 --- a/arch/powerpc/kernel/head_64.S +++ b/arch/powerpc/kernel/head_64.S @@ -392,19 +392,20 @@ generic_secondary_common_init: * physical cpu id in r24, we need to search the pacas to find * which logical id maps to our physical one. */ - LOAD_REG_ADDR(r13, paca) /* Load paca pointer */ - ld r13,0(r13) /* Get base vaddr of paca array */ #ifndef CONFIG_SMP - addi r13,r13,PACA_SIZE /* know r13 if used accidentally */ b kexec_wait /* wait for next kernel if !SMP */ #else + LOAD_REG_ADDR(r8, paca_ptrs) /* Load paca_ptrs pointe */ + ld r8,0(r8) /* Get base vaddr of array */ LOAD_REG_ADDR(r7, nr_cpu_ids) /* Load nr_cpu_ids address */ lwz r7,0(r7) /* also the max paca allocated */ li r5,0 /* logical cpu id */ -1: lhz r6,PACAHWCPUID(r13) /* Load HW procid from paca */ +1: + sldi r9,r5,3 /* get paca_ptrs[] index from cpu id */ + ldx r13,r9,r8 /* r13 = paca_ptrs[cpu id] */ + lhz r6,PACAHWCPUID(r13) /* Load HW procid from paca */ cmpw r6,r24 /* Compare to our id */ beq 2f - addi r13,r13,PACA_SIZE /* Loop to next PACA on miss */ addi r5,r5,1 cmpw r5,r7 /* Check if more pacas exist */ blt 1b @@ -756,10 +757,10 @@ _GLOBAL(pmac_secondary_start) mtmsrd r3 /* RI on */ /* Set up a paca value for this processor. */ - LOAD_REG_ADDR(r4,paca) /* Load paca pointer */ - ld r4,0(r4) /* Get base vaddr of paca array */ - mulli r13,r24,PACA_SIZE /* Calculate vaddr of right paca */ - add r13,r13,r4 /* for this processor. */ + LOAD_REG_ADDR(r4,paca_ptrs) /* Load paca pointer */ + ld r4,0(r4) /* Get base vaddr of paca_ptrs array */ + sldi r5,r24,3 /* get paca_ptrs[] index from cpu id */ + ldx r13,r5,r4 /* r13 = paca_ptrs[cpu id] */ SET_PACA(r13) /* Save vaddr of paca in an SPRG*/ /* Mark interrupts soft and hard disabled (they might be enabled diff --git a/arch/powerpc/kernel/machine_kexec_64.c b/arch/powerpc/kernel/machine_kexec_64.c index 49d34d7271e7..1044bf15d5ed 100644 --- a/arch/powerpc/kernel/machine_kexec_64.c +++ b/arch/powerpc/kernel/machine_kexec_64.c @@ -168,24 +168,25 @@ static void kexec_prepare_cpus_wait(int wait_state) * are correctly onlined. If somehow we start a CPU on boot with RTAS * start-cpu, but somehow that CPU doesn't write callin_cpu_map[] in * time, the boot CPU will timeout. If it does eventually execute - * stuff, the secondary will start up (paca[].cpu_start was written) and - * get into a peculiar state. If the platform supports - * smp_ops->take_timebase(), the secondary CPU will probably be spinning - * in there. If not (i.e. pseries), the secondary will continue on and - * try to online itself/idle/etc. If it survives that, we need to find - * these possible-but-not-online-but-should-be CPUs and chaperone them - * into kexec_smp_wait(). + * stuff, the secondary will start up (paca_ptrs[]->cpu_start was + * written) and get into a peculiar state. + * If the platform supports smp_ops->take_timebase(), the secondary CPU + * will probably be spinning in there. If not (i.e. pseries), the + * secondary will continue on and try to online itself/idle/etc. If it + * survives that, we need to find these + * possible-but-not-online-but-should-be CPUs and chaperone them into + * kexec_smp_wait(). */ for_each_online_cpu(i) { if (i == my_cpu) continue; - while (paca[i].kexec_state < wait_state) { + while (paca_ptrs[i]->kexec_state < wait_state) { barrier(); if (i != notified) { printk(KERN_INFO "kexec: waiting for cpu %d " "(physical %d) to enter %i state\n", - i, paca[i].hw_cpu_id, wait_state); + i, paca_ptrs[i]->hw_cpu_id, wait_state); notified = i; } } @@ -322,18 +323,24 @@ void default_machine_kexec(struct kimage *image) kexec_stack.thread_info.cpu = current_thread_info()->cpu; /* We need a static PACA, too; copy this CPU's PACA over and switch to - * it. Also poison per_cpu_offset to catch anyone using non-static - * data. + * it. Also poison per_cpu_offset and NULL lppaca to catch anyone using + * non-static data. */ memcpy(&kexec_paca, get_paca(), sizeof(struct paca_struct)); kexec_paca.data_offset = 0xedeaddeadeeeeeeeUL; - paca = (struct paca_struct *)RELOC_HIDE(&kexec_paca, 0) - - kexec_paca.paca_index; +#ifdef CONFIG_PPC_PSERIES + kexec_paca.lppaca_ptr = NULL; +#endif + paca_ptrs[kexec_paca.paca_index] = &kexec_paca; + setup_paca(&kexec_paca); - /* XXX: If anyone does 'dynamic lppacas' this will also need to be - * switched to a static version! + /* + * The lppaca should be unregistered at this point so the HV won't + * touch it. In the case of a crash, none of the lppacas are + * unregistered so there is not much we can do about it here. */ + /* * On Book3S, the copy must happen with the MMU off if we are either * using Radix page tables or we are not in an LPAR since we can diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c index 2fd563d05831..0ee3e6d50f28 100644 --- a/arch/powerpc/kernel/paca.c +++ b/arch/powerpc/kernel/paca.c @@ -20,116 +20,105 @@ #include "setup.h" -#ifdef CONFIG_PPC_BOOK3S +#ifndef CONFIG_SMP +#define boot_cpuid 0 +#endif + +static void *__init alloc_paca_data(unsigned long size, unsigned long align, + unsigned long limit, int cpu) +{ + unsigned long pa; + int nid; + + /* + * boot_cpuid paca is allocated very early before cpu_to_node is up. + * Set bottom-up mode, because the boot CPU should be on node-0, + * which will put its paca in the right place. + */ + if (cpu == boot_cpuid) { + nid = -1; + memblock_set_bottom_up(true); + } else { + nid = early_cpu_to_node(cpu); + } + + pa = memblock_alloc_base_nid(size, align, limit, nid, MEMBLOCK_NONE); + if (!pa) { + pa = memblock_alloc_base(size, align, limit); + if (!pa) + panic("cannot allocate paca data"); + } + + if (cpu == boot_cpuid) + memblock_set_bottom_up(false); + + return __va(pa); +} + +#ifdef CONFIG_PPC_PSERIES /* - * The structure which the hypervisor knows about - this structure - * should not cross a page boundary. The vpa_init/register_vpa call - * is now known to fail if the lppaca structure crosses a page - * boundary. The lppaca is also used on POWER5 pSeries boxes. - * The lppaca is 640 bytes long, and cannot readily - * change since the hypervisor knows its layout, so a 1kB alignment - * will suffice to ensure that it doesn't cross a page boundary. + * See asm/lppaca.h for more detail. + * + * lppaca structures must must be 1kB in size, L1 cache line aligned, + * and not cross 4kB boundary. A 1kB size and 1kB alignment will satisfy + * these requirements. */ -struct lppaca lppaca[] = { - [0 ... (NR_LPPACAS-1)] = { +static inline void init_lppaca(struct lppaca *lppaca) +{ + BUILD_BUG_ON(sizeof(struct lppaca) != 640); + + *lppaca = (struct lppaca) { .desc = cpu_to_be32(0xd397d781), /* "LpPa" */ - .size = cpu_to_be16(sizeof(struct lppaca)), + .size = cpu_to_be16(0x400), .fpregs_in_use = 1, .slb_count = cpu_to_be16(64), .vmxregs_in_use = 0, - .page_ins = 0, - }, + .page_ins = 0, }; }; -static struct lppaca *extra_lppacas; -static long __initdata lppaca_size; - -static void __init allocate_lppacas(int nr_cpus, unsigned long limit) -{ - if (nr_cpus <= NR_LPPACAS) - return; - - lppaca_size = PAGE_ALIGN(sizeof(struct lppaca) * - (nr_cpus - NR_LPPACAS)); - extra_lppacas = __va(memblock_alloc_base(lppaca_size, - PAGE_SIZE, limit)); -} - -static struct lppaca * __init new_lppaca(int cpu) +static struct lppaca * __init new_lppaca(int cpu, unsigned long limit) { struct lppaca *lp; + size_t size = 0x400; - if (cpu < NR_LPPACAS) - return &lppaca[cpu]; + BUILD_BUG_ON(size < sizeof(struct lppaca)); - lp = extra_lppacas + (cpu - NR_LPPACAS); - *lp = lppaca[0]; + if (early_cpu_has_feature(CPU_FTR_HVMODE)) + return NULL; + + lp = alloc_paca_data(size, 0x400, limit, cpu); + init_lppaca(lp); return lp; } - -static void __init free_lppacas(void) -{ - long new_size = 0, nr; - - if (!lppaca_size) - return; - nr = num_possible_cpus() - NR_LPPACAS; - if (nr > 0) - new_size = PAGE_ALIGN(nr * sizeof(struct lppaca)); - if (new_size >= lppaca_size) - return; - - memblock_free(__pa(extra_lppacas) + new_size, lppaca_size - new_size); - lppaca_size = new_size; -} - -#else - -static inline void allocate_lppacas(int nr_cpus, unsigned long limit) { } -static inline void free_lppacas(void) { } - #endif /* CONFIG_PPC_BOOK3S */ #ifdef CONFIG_PPC_BOOK3S_64 /* - * 3 persistent SLBs are registered here. The buffer will be zero + * 3 persistent SLBs are allocated here. The buffer will be zero * initially, hence will all be invaild until we actually write them. * * If you make the number of persistent SLB entries dynamic, please also * update PR KVM to flush and restore them accordingly. */ -static struct slb_shadow * __initdata slb_shadow; - -static void __init allocate_slb_shadows(int nr_cpus, int limit) -{ - int size = PAGE_ALIGN(sizeof(struct slb_shadow) * nr_cpus); - - if (early_radix_enabled()) - return; - - slb_shadow = __va(memblock_alloc_base(size, PAGE_SIZE, limit)); - memset(slb_shadow, 0, size); -} - -static struct slb_shadow * __init init_slb_shadow(int cpu) +static struct slb_shadow * __init new_slb_shadow(int cpu, unsigned long limit) { struct slb_shadow *s; - if (early_radix_enabled()) - return NULL; + if (cpu != boot_cpuid) { + /* + * Boot CPU comes here before early_radix_enabled + * is parsed (e.g., for disable_radix). So allocate + * always and this will be fixed up in free_unused_pacas. + */ + if (early_radix_enabled()) + return NULL; + } - s = &slb_shadow[cpu]; - - /* - * When we come through here to initialise boot_paca, the slb_shadow - * buffers are not allocated yet. That's OK, we'll get one later in - * boot, but make sure we don't corrupt memory at 0. - */ - if (!slb_shadow) - return NULL; + s = alloc_paca_data(sizeof(*s), L1_CACHE_BYTES, limit, cpu); + memset(s, 0, sizeof(*s)); s->persistent = cpu_to_be32(SLB_NUM_BOLTED); s->buffer_length = cpu_to_be32(sizeof(*s)); @@ -137,10 +126,6 @@ static struct slb_shadow * __init init_slb_shadow(int cpu) return s; } -#else /* !CONFIG_PPC_BOOK3S_64 */ - -static void __init allocate_slb_shadows(int nr_cpus, int limit) { } - #endif /* CONFIG_PPC_BOOK3S_64 */ /* The Paca is an array with one entry per processor. Each contains an @@ -152,14 +137,15 @@ static void __init allocate_slb_shadows(int nr_cpus, int limit) { } * processors. The processor VPD array needs one entry per physical * processor (not thread). */ -struct paca_struct *paca; -EXPORT_SYMBOL(paca); +struct paca_struct **paca_ptrs __read_mostly; +EXPORT_SYMBOL(paca_ptrs); void __init initialise_paca(struct paca_struct *new_paca, int cpu) { -#ifdef CONFIG_PPC_BOOK3S - new_paca->lppaca_ptr = new_lppaca(cpu); -#else +#ifdef CONFIG_PPC_PSERIES + new_paca->lppaca_ptr = NULL; +#endif +#ifdef CONFIG_PPC_BOOK3E new_paca->kernel_pgd = swapper_pg_dir; #endif new_paca->lock_token = 0x8000; @@ -173,7 +159,7 @@ void __init initialise_paca(struct paca_struct *new_paca, int cpu) new_paca->__current = &init_task; new_paca->data_offset = 0xfeeeeeeeeeeeeeeeULL; #ifdef CONFIG_PPC_BOOK3S_64 - new_paca->slb_shadow_ptr = init_slb_shadow(cpu); + new_paca->slb_shadow_ptr = NULL; #endif #ifdef CONFIG_PPC_BOOK3E @@ -203,12 +189,25 @@ void setup_paca(struct paca_struct *new_paca) } -static int __initdata paca_size; +static int __initdata paca_nr_cpu_ids; +static int __initdata paca_ptrs_size; +static int __initdata paca_struct_size; -void __init allocate_pacas(void) +void __init allocate_paca_ptrs(void) +{ + paca_nr_cpu_ids = nr_cpu_ids; + + paca_ptrs_size = sizeof(struct paca_struct *) * nr_cpu_ids; + paca_ptrs = __va(memblock_alloc(paca_ptrs_size, 0)); + memset(paca_ptrs, 0x88, paca_ptrs_size); +} + +void __init allocate_paca(int cpu) { u64 limit; - int cpu; + struct paca_struct *paca; + + BUG_ON(cpu >= paca_nr_cpu_ids); #ifdef CONFIG_PPC_BOOK3S_64 /* @@ -220,40 +219,44 @@ void __init allocate_pacas(void) limit = ppc64_rma_size; #endif - paca_size = PAGE_ALIGN(sizeof(struct paca_struct) * nr_cpu_ids); + paca = alloc_paca_data(sizeof(struct paca_struct), L1_CACHE_BYTES, + limit, cpu); + paca_ptrs[cpu] = paca; + memset(paca, 0, sizeof(struct paca_struct)); - paca = __va(memblock_alloc_base(paca_size, PAGE_SIZE, limit)); - memset(paca, 0, paca_size); - - printk(KERN_DEBUG "Allocated %u bytes for %u pacas at %p\n", - paca_size, nr_cpu_ids, paca); - - allocate_lppacas(nr_cpu_ids, limit); - - allocate_slb_shadows(nr_cpu_ids, limit); - - /* Can't use for_each_*_cpu, as they aren't functional yet */ - for (cpu = 0; cpu < nr_cpu_ids; cpu++) - initialise_paca(&paca[cpu], cpu); + initialise_paca(paca, cpu); +#ifdef CONFIG_PPC_PSERIES + paca->lppaca_ptr = new_lppaca(cpu, limit); +#endif +#ifdef CONFIG_PPC_BOOK3S_64 + paca->slb_shadow_ptr = new_slb_shadow(cpu, limit); +#endif + paca_struct_size += sizeof(struct paca_struct); } void __init free_unused_pacas(void) { - int new_size; + int new_ptrs_size; - new_size = PAGE_ALIGN(sizeof(struct paca_struct) * nr_cpu_ids); + new_ptrs_size = sizeof(struct paca_struct *) * nr_cpu_ids; + if (new_ptrs_size < paca_ptrs_size) + memblock_free(__pa(paca_ptrs) + new_ptrs_size, + paca_ptrs_size - new_ptrs_size); - if (new_size >= paca_size) - return; + paca_nr_cpu_ids = nr_cpu_ids; + paca_ptrs_size = new_ptrs_size; - memblock_free(__pa(paca) + new_size, paca_size - new_size); +#ifdef CONFIG_PPC_BOOK3S_64 + if (early_radix_enabled()) { + /* Ugly fixup, see new_slb_shadow() */ + memblock_free(__pa(paca_ptrs[boot_cpuid]->slb_shadow_ptr), + sizeof(struct slb_shadow)); + paca_ptrs[boot_cpuid]->slb_shadow_ptr = NULL; + } +#endif - printk(KERN_DEBUG "Freed %u bytes for unused pacas\n", - paca_size - new_size); - - paca_size = new_size; - - free_lppacas(); + printk(KERN_DEBUG "Allocated %u bytes for %u pacas\n", + paca_ptrs_size + paca_struct_size, nr_cpu_ids); } void copy_mm_to_paca(struct mm_struct *mm) diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index 330c65f04820..9dbed488aba1 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -365,7 +365,6 @@ static int __init early_init_dt_scan_cpus(unsigned long node, DBG("boot cpu: logical %d physical %d\n", found, be32_to_cpu(intserv[found_thread])); boot_cpuid = found; - set_hard_smp_processor_id(found, be32_to_cpu(intserv[found_thread])); /* * PAPR defines "logical" PVR values for cpus that @@ -403,7 +402,9 @@ static int __init early_init_dt_scan_cpus(unsigned long node, cur_cpu_spec->cpu_features &= ~CPU_FTR_SMT; else if (!dt_cpu_ftrs_in_use()) cur_cpu_spec->cpu_features |= CPU_FTR_SMT; + allocate_paca(boot_cpuid); #endif + set_hard_smp_processor_id(found, be32_to_cpu(intserv[found_thread])); return 0; } @@ -744,7 +745,7 @@ void __init early_init_devtree(void *params) * FIXME .. and the initrd too? */ move_device_tree(); - allocate_pacas(); + allocate_paca_ptrs(); DBG("Scanning CPUs ...\n"); @@ -874,5 +875,15 @@ EXPORT_SYMBOL(cpu_to_chip_id); bool arch_match_cpu_phys_id(int cpu, u64 phys_id) { +#ifdef CONFIG_SMP + /* + * Early firmware scanning must use this rather than + * get_hard_smp_processor_id because we don't have pacas allocated + * until memory topology is discovered. + */ + if (cpu_to_phys_id != NULL) + return (int)phys_id == cpu_to_phys_id[cpu]; +#endif + return (int)phys_id == get_hard_smp_processor_id(cpu); } diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index a6002f9449b1..56f7a2b793e0 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -437,6 +437,8 @@ static void __init cpu_init_thread_core_maps(int tpc) } +u32 *cpu_to_phys_id = NULL; + /** * setup_cpu_maps - initialize the following cpu maps: * cpu_possible_mask @@ -463,6 +465,10 @@ void __init smp_setup_cpu_maps(void) DBG("smp_setup_cpu_maps()\n"); + cpu_to_phys_id = __va(memblock_alloc(nr_cpu_ids * sizeof(u32), + __alignof__(u32))); + memset(cpu_to_phys_id, 0, nr_cpu_ids * sizeof(u32)); + for_each_node_by_type(dn, "cpu") { const __be32 *intserv; __be32 cpu_be; @@ -480,6 +486,7 @@ void __init smp_setup_cpu_maps(void) intserv = of_get_property(dn, "reg", &len); if (!intserv) { cpu_be = cpu_to_be32(cpu); + /* XXX: what is this? uninitialized?? */ intserv = &cpu_be; /* assume logical == phys */ len = 4; } @@ -499,8 +506,8 @@ void __init smp_setup_cpu_maps(void) "enable-method", "spin-table"); set_cpu_present(cpu, avail); - set_hard_smp_processor_id(cpu, be32_to_cpu(intserv[j])); set_cpu_possible(cpu, true); + cpu_to_phys_id[cpu] = be32_to_cpu(intserv[j]); cpu++; } @@ -835,6 +842,23 @@ static __init void print_system_info(void) pr_info("-----------------------------------------------------\n"); } +#ifdef CONFIG_SMP +static void smp_setup_pacas(void) +{ + int cpu; + + for_each_possible_cpu(cpu) { + if (cpu == smp_processor_id()) + continue; + allocate_paca(cpu); + set_hard_smp_processor_id(cpu, cpu_to_phys_id[cpu]); + } + + memblock_free(__pa(cpu_to_phys_id), nr_cpu_ids * sizeof(u32)); + cpu_to_phys_id = NULL; +} +#endif + /* * Called into from start_kernel this initializes memblock, which is used * to manage page allocation until mem_init is called. @@ -888,6 +912,9 @@ void __init setup_arch(char **cmdline_p) /* Check the SMT related command line arguments (ppc64). */ check_smt_enabled(); + /* Parse memory topology */ + mem_topology_setup(); + /* On BookE, setup per-core TLB data structures. */ setup_tlb_core_data(); @@ -899,6 +926,7 @@ void __init setup_arch(char **cmdline_p) * so smp_release_cpus() does nothing for them. */ #ifdef CONFIG_SMP + smp_setup_pacas(); smp_release_cpus(); #endif diff --git a/arch/powerpc/kernel/setup.h b/arch/powerpc/kernel/setup.h index 3fc11e30308f..d144df54ad40 100644 --- a/arch/powerpc/kernel/setup.h +++ b/arch/powerpc/kernel/setup.h @@ -45,14 +45,11 @@ void emergency_stack_init(void); static inline void emergency_stack_init(void) { }; #endif -#ifdef CONFIG_PPC64 -void record_spr_defaults(void); -#else -static inline void record_spr_defaults(void) { }; -#endif - #ifdef CONFIG_PPC64 u64 ppc64_bolted_size(void); + +/* Default SPR values from firmware/kexec */ +extern unsigned long spr_default_dscr; #endif /* diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 7f7621668613..66f2b6299c40 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -110,7 +110,7 @@ void __init setup_tlb_core_data(void) if (cpu_first_thread_sibling(boot_cpuid) == first) first = boot_cpuid; - paca[cpu].tcd_ptr = &paca[first].tcd; + paca_ptrs[cpu]->tcd_ptr = &paca_ptrs[first]->tcd; /* * If we have threads, we need either tlbsrx. @@ -254,6 +254,14 @@ static void cpu_ready_for_interrupts(void) get_paca()->kernel_msr = MSR_KERNEL; } +unsigned long spr_default_dscr = 0; + +void __init record_spr_defaults(void) +{ + if (early_cpu_has_feature(CPU_FTR_DSCR)) + spr_default_dscr = mfspr(SPRN_DSCR); +} + /* * Early initialization entry point. This is called by head.S * with MMU translation disabled. We rely on the "feature" of @@ -304,7 +312,11 @@ void __init early_setup(unsigned long dt_ptr) early_init_devtree(__va(dt_ptr)); /* Now we know the logical id of our boot cpu, setup the paca. */ - setup_paca(&paca[boot_cpuid]); + if (boot_cpuid != 0) { + /* Poison paca_ptrs[0] again if it's not the boot cpu */ + memset(&paca_ptrs[0], 0x88, sizeof(paca_ptrs[0])); + } + setup_paca(paca_ptrs[boot_cpuid]); fixup_boot_paca(); /* @@ -599,6 +611,21 @@ __init u64 ppc64_bolted_size(void) #endif } +static void *__init alloc_stack(unsigned long limit, int cpu) +{ + unsigned long pa; + + pa = memblock_alloc_base_nid(THREAD_SIZE, THREAD_SIZE, limit, + early_cpu_to_node(cpu), MEMBLOCK_NONE); + if (!pa) { + pa = memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit); + if (!pa) + panic("cannot allocate stacks"); + } + + return __va(pa); +} + void __init irqstack_early_init(void) { u64 limit = ppc64_bolted_size(); @@ -610,12 +637,8 @@ void __init irqstack_early_init(void) * accessed in realmode. */ for_each_possible_cpu(i) { - softirq_ctx[i] = (struct thread_info *) - __va(memblock_alloc_base(THREAD_SIZE, - THREAD_SIZE, limit)); - hardirq_ctx[i] = (struct thread_info *) - __va(memblock_alloc_base(THREAD_SIZE, - THREAD_SIZE, limit)); + softirq_ctx[i] = alloc_stack(limit, i); + hardirq_ctx[i] = alloc_stack(limit, i); } } @@ -623,20 +646,21 @@ void __init irqstack_early_init(void) void __init exc_lvl_early_init(void) { unsigned int i; - unsigned long sp; for_each_possible_cpu(i) { - sp = memblock_alloc(THREAD_SIZE, THREAD_SIZE); - critirq_ctx[i] = (struct thread_info *)__va(sp); - paca[i].crit_kstack = __va(sp + THREAD_SIZE); + void *sp; - sp = memblock_alloc(THREAD_SIZE, THREAD_SIZE); - dbgirq_ctx[i] = (struct thread_info *)__va(sp); - paca[i].dbg_kstack = __va(sp + THREAD_SIZE); + sp = alloc_stack(ULONG_MAX, i); + critirq_ctx[i] = sp; + paca_ptrs[i]->crit_kstack = sp + THREAD_SIZE; - sp = memblock_alloc(THREAD_SIZE, THREAD_SIZE); - mcheckirq_ctx[i] = (struct thread_info *)__va(sp); - paca[i].mc_kstack = __va(sp + THREAD_SIZE); + sp = alloc_stack(ULONG_MAX, i); + dbgirq_ctx[i] = sp; + paca_ptrs[i]->dbg_kstack = sp + THREAD_SIZE; + + sp = alloc_stack(ULONG_MAX, i); + mcheckirq_ctx[i] = sp; + paca_ptrs[i]->mc_kstack = sp + THREAD_SIZE; } if (cpu_has_feature(CPU_FTR_DEBUG_LVL_EXC)) @@ -690,23 +714,24 @@ void __init emergency_stack_init(void) for_each_possible_cpu(i) { struct thread_info *ti; - ti = __va(memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit)); + + ti = alloc_stack(limit, i); memset(ti, 0, THREAD_SIZE); emerg_stack_init_thread_info(ti, i); - paca[i].emergency_sp = (void *)ti + THREAD_SIZE; + paca_ptrs[i]->emergency_sp = (void *)ti + THREAD_SIZE; #ifdef CONFIG_PPC_BOOK3S_64 /* emergency stack for NMI exception handling. */ - ti = __va(memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit)); + ti = alloc_stack(limit, i); memset(ti, 0, THREAD_SIZE); emerg_stack_init_thread_info(ti, i); - paca[i].nmi_emergency_sp = (void *)ti + THREAD_SIZE; + paca_ptrs[i]->nmi_emergency_sp = (void *)ti + THREAD_SIZE; /* emergency stack for machine check exception handling. */ - ti = __va(memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit)); + ti = alloc_stack(limit, i); memset(ti, 0, THREAD_SIZE); emerg_stack_init_thread_info(ti, i); - paca[i].mc_emergency_sp = (void *)ti + THREAD_SIZE; + paca_ptrs[i]->mc_emergency_sp = (void *)ti + THREAD_SIZE; #endif } } @@ -762,7 +787,7 @@ void __init setup_per_cpu_areas(void) delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start; for_each_possible_cpu(cpu) { __per_cpu_offset[cpu] = delta + pcpu_unit_offsets[cpu]; - paca[cpu].data_offset = __per_cpu_offset[cpu]; + paca_ptrs[cpu]->data_offset = __per_cpu_offset[cpu]; } } #endif @@ -876,8 +901,9 @@ static void init_fallback_flush(void) memset(l1d_flush_fallback_area, 0, l1d_size * 2); for_each_possible_cpu(cpu) { - paca[cpu].rfi_flush_fallback_area = l1d_flush_fallback_area; - paca[cpu].l1d_flush_size = l1d_size; + struct paca_struct *paca = paca_ptrs[cpu]; + paca->rfi_flush_fallback_area = l1d_flush_fallback_area; + paca->l1d_flush_size = l1d_size; } } diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index bbe7634b3a43..cfc08b099c49 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -123,8 +123,8 @@ int smp_generic_kick_cpu(int nr) * cpu_start field to become non-zero After we set cpu_start, * the processor will continue on to secondary_start */ - if (!paca[nr].cpu_start) { - paca[nr].cpu_start = 1; + if (!paca_ptrs[nr]->cpu_start) { + paca_ptrs[nr]->cpu_start = 1; smp_mb(); return 0; } @@ -657,7 +657,7 @@ void smp_prepare_boot_cpu(void) { BUG_ON(smp_processor_id() != boot_cpuid); #ifdef CONFIG_PPC64 - paca[boot_cpuid].__current = current; + paca_ptrs[boot_cpuid]->__current = current; #endif set_numa_node(numa_cpu_lookup_table[boot_cpuid]); current_set[boot_cpuid] = task_thread_info(current); @@ -748,8 +748,8 @@ static void cpu_idle_thread_init(unsigned int cpu, struct task_struct *idle) struct thread_info *ti = task_thread_info(idle); #ifdef CONFIG_PPC64 - paca[cpu].__current = idle; - paca[cpu].kstack = (unsigned long)ti + THREAD_SIZE - STACK_FRAME_OVERHEAD; + paca_ptrs[cpu]->__current = idle; + paca_ptrs[cpu]->kstack = (unsigned long)ti + THREAD_SIZE - STACK_FRAME_OVERHEAD; #endif ti->cpu = cpu; secondary_ti = current_set[cpu] = ti; diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c index 04d0bbd7a1dd..755dc98a57ae 100644 --- a/arch/powerpc/kernel/sysfs.c +++ b/arch/powerpc/kernel/sysfs.c @@ -20,6 +20,7 @@ #include #include "cacheinfo.h" +#include "setup.h" #ifdef CONFIG_PPC64 #include @@ -588,21 +589,18 @@ static DEVICE_ATTR(dscr_default, 0600, static void sysfs_create_dscr_default(void) { - int err = 0; - if (cpu_has_feature(CPU_FTR_DSCR)) - err = device_create_file(cpu_subsys.dev_root, &dev_attr_dscr_default); -} - -void __init record_spr_defaults(void) -{ - int cpu; - if (cpu_has_feature(CPU_FTR_DSCR)) { - dscr_default = mfspr(SPRN_DSCR); - for (cpu = 0; cpu < nr_cpu_ids; cpu++) - paca[cpu].dscr_default = dscr_default; + int err = 0; + int cpu; + + dscr_default = spr_default_dscr; + for_each_possible_cpu(cpu) + paca_ptrs[cpu]->dscr_default = dscr_default; + + err = device_create_file(cpu_subsys.dev_root, &dev_attr_dscr_default); } } + #endif /* CONFIG_PPC64 */ #ifdef HAS_PPC_PMC_PA6T diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 55c1022733c3..1e1211c66b26 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -170,7 +170,7 @@ static bool kvmppc_ipi_thread(int cpu) #if defined(CONFIG_PPC_ICP_NATIVE) && defined(CONFIG_SMP) if (cpu >= 0 && cpu < nr_cpu_ids) { - if (paca[cpu].kvm_hstate.xics_phys) { + if (paca_ptrs[cpu]->kvm_hstate.xics_phys) { xics_wake_cpu(cpu); return true; } @@ -498,7 +498,8 @@ static unsigned long do_h_register_vpa(struct kvm_vcpu *vcpu, * use 640 bytes of the structure though, so we should accept * clients that set a size of 640. */ - if (len < 640) + BUILD_BUG_ON(sizeof(struct lppaca) != 640); + if (len < sizeof(struct lppaca)) break; vpap = &tvcpu->arch.vpa; err = 0; @@ -2157,7 +2158,7 @@ static int kvmppc_grab_hwthread(int cpu) struct paca_struct *tpaca; long timeout = 10000; - tpaca = &paca[cpu]; + tpaca = paca_ptrs[cpu]; /* Ensure the thread won't go into the kernel if it wakes */ tpaca->kvm_hstate.kvm_vcpu = NULL; @@ -2190,7 +2191,7 @@ static void kvmppc_release_hwthread(int cpu) { struct paca_struct *tpaca; - tpaca = &paca[cpu]; + tpaca = paca_ptrs[cpu]; tpaca->kvm_hstate.hwthread_req = 0; tpaca->kvm_hstate.kvm_vcpu = NULL; tpaca->kvm_hstate.kvm_vcore = NULL; @@ -2256,7 +2257,7 @@ static void kvmppc_start_thread(struct kvm_vcpu *vcpu, struct kvmppc_vcore *vc) vcpu->arch.thread_cpu = cpu; cpumask_set_cpu(cpu, &kvm->arch.cpu_in_guest); } - tpaca = &paca[cpu]; + tpaca = paca_ptrs[cpu]; tpaca->kvm_hstate.kvm_vcpu = vcpu; tpaca->kvm_hstate.ptid = cpu - vc->pcpu; tpaca->kvm_hstate.fake_suspend = 0; @@ -2282,7 +2283,7 @@ static void kvmppc_wait_for_nap(int n_threads) * for any threads that still have a non-NULL vcore ptr. */ for (i = 1; i < n_threads; ++i) - if (paca[cpu + i].kvm_hstate.kvm_vcore) + if (paca_ptrs[cpu + i]->kvm_hstate.kvm_vcore) break; if (i == n_threads) { HMT_medium(); @@ -2292,7 +2293,7 @@ static void kvmppc_wait_for_nap(int n_threads) } HMT_medium(); for (i = 1; i < n_threads; ++i) - if (paca[cpu + i].kvm_hstate.kvm_vcore) + if (paca_ptrs[cpu + i]->kvm_hstate.kvm_vcore) pr_err("KVM: CPU %d seems to be stuck\n", cpu + i); } @@ -2824,9 +2825,11 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc) } for (thr = 0; thr < controlled_threads; ++thr) { - paca[pcpu + thr].kvm_hstate.tid = thr; - paca[pcpu + thr].kvm_hstate.napping = 0; - paca[pcpu + thr].kvm_hstate.kvm_split_mode = sip; + struct paca_struct *paca = paca_ptrs[pcpu + thr]; + + paca->kvm_hstate.tid = thr; + paca->kvm_hstate.napping = 0; + paca->kvm_hstate.kvm_split_mode = sip; } /* Initiate micro-threading (split-core) on POWER8 if required */ @@ -2943,7 +2946,9 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc) } else if (hpt_on_radix) { /* Wait for all threads to have seen final sync */ for (thr = 1; thr < controlled_threads; ++thr) { - while (paca[pcpu + thr].kvm_hstate.kvm_split_mode) { + struct paca_struct *paca = paca_ptrs[pcpu + thr]; + + while (paca->kvm_hstate.kvm_split_mode) { HMT_low(); barrier(); } @@ -4405,7 +4410,7 @@ static int kvm_init_subcore_bitmap(void) int node = cpu_to_node(first_cpu); /* Ignore if it is already allocated. */ - if (paca[first_cpu].sibling_subcore_state) + if (paca_ptrs[first_cpu]->sibling_subcore_state) continue; sibling_subcore_state = @@ -4420,7 +4425,8 @@ static int kvm_init_subcore_bitmap(void) for (j = 0; j < threads_per_core; j++) { int cpu = first_cpu + j; - paca[cpu].sibling_subcore_state = sibling_subcore_state; + paca_ptrs[cpu]->sibling_subcore_state = + sibling_subcore_state; } } return 0; @@ -4447,7 +4453,7 @@ static int kvmppc_book3s_init_hv(void) /* * We need a way of accessing the XICS interrupt controller, - * either directly, via paca[cpu].kvm_hstate.xics_phys, or + * either directly, via paca_ptrs[cpu]->kvm_hstate.xics_phys, or * indirectly, via OPAL. */ #ifdef CONFIG_SMP diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c index 49a2c7825e04..de18299f92b7 100644 --- a/arch/powerpc/kvm/book3s_hv_builtin.c +++ b/arch/powerpc/kvm/book3s_hv_builtin.c @@ -251,7 +251,7 @@ void kvmhv_rm_send_ipi(int cpu) return; /* Else poke the target with an IPI */ - xics_phys = paca[cpu].kvm_hstate.xics_phys; + xics_phys = paca_ptrs[cpu]->kvm_hstate.xics_phys; if (xics_phys) __raw_rm_writeb(IPI_PRIORITY, xics_phys + XICS_MFRR); else diff --git a/arch/powerpc/kvm/book3s_hv_interrupts.S b/arch/powerpc/kvm/book3s_hv_interrupts.S index dc54373c8780..0e8493033288 100644 --- a/arch/powerpc/kvm/book3s_hv_interrupts.S +++ b/arch/powerpc/kvm/book3s_hv_interrupts.S @@ -79,8 +79,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) li r5, 0 mtspr SPRN_MMCRA, r5 isync - ld r3, PACALPPACAPTR(r13) /* is the host using the PMU? */ - lbz r5, LPPACA_PMCINUSE(r3) + lbz r5, PACA_PMCINUSE(r13) /* is the host using the PMU? */ cmpwi r5, 0 beq 31f /* skip if not */ mfspr r5, SPRN_MMCR1 diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index af1772169eff..95c616f2da22 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -113,8 +113,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) mtspr SPRN_SPRG_VDSO_WRITE,r3 /* Reload the host's PMU registers */ - ld r3, PACALPPACAPTR(r13) /* is the host using the PMU? */ - lbz r4, LPPACA_PMCINUSE(r3) + lbz r4, PACA_PMCINUSE(r13) /* is the host using the PMU? */ cmpwi r4, 0 beq 23f /* skip if not */ BEGIN_FTR_SECTION diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 4180b89b8922..7587a2ec8874 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -781,7 +781,7 @@ void resize_hpt_for_hotplug(unsigned long new_mem_size) } } -int hash__create_section_mapping(unsigned long start, unsigned long end) +int hash__create_section_mapping(unsigned long start, unsigned long end, int nid) { int rc = htab_bolt_mapping(start, end, __pa(start), pgprot_val(PAGE_KERNEL), mmu_linear_psize, diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 85245ef97e72..e2f5025b03b0 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -117,7 +117,7 @@ int memory_add_physaddr_to_nid(u64 start) } #endif -int __weak create_section_mapping(unsigned long start, unsigned long end) +int __weak create_section_mapping(unsigned long start, unsigned long end, int nid) { return -ENODEV; } @@ -137,7 +137,7 @@ int __meminit arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap * resize_hpt_for_hotplug(memblock_phys_mem_size()); start = (unsigned long)__va(start); - rc = create_section_mapping(start, start + size); + rc = create_section_mapping(start, start + size, nid); if (rc) { pr_warn("Unable to create mapping for hot added memory 0x%llx..0x%llx: %d\n", start, start + size, rc); @@ -212,7 +212,7 @@ walk_system_ram_range(unsigned long start_pfn, unsigned long nr_pages, EXPORT_SYMBOL_GPL(walk_system_ram_range); #ifndef CONFIG_NEED_MULTIPLE_NODES -void __init initmem_init(void) +void __init mem_topology_setup(void) { max_low_pfn = max_pfn = memblock_end_of_DRAM() >> PAGE_SHIFT; min_low_pfn = MEMORY_START >> PAGE_SHIFT; @@ -224,7 +224,10 @@ void __init initmem_init(void) * memblock_regions */ memblock_set_node(0, (phys_addr_t)ULLONG_MAX, &memblock.memory, 0); +} +void __init initmem_init(void) +{ /* XXX need to clip this if using highmem? */ sparse_memory_present_with_active_regions(0); sparse_init(); diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index edd8d0bc9364..57a5029b4521 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -831,18 +831,13 @@ out: of_node_put(rtas); } -void __init initmem_init(void) +void __init mem_topology_setup(void) { - int nid, cpu; - - max_low_pfn = memblock_end_of_DRAM() >> PAGE_SHIFT; - max_pfn = max_low_pfn; + int cpu; if (parse_numa_properties()) setup_nonnuma(); - memblock_dump_all(); - /* * Modify the set of possible NUMA nodes to reflect information * available about the set of online nodes, and the set of nodes @@ -853,6 +848,23 @@ void __init initmem_init(void) find_possible_nodes(); + setup_node_to_cpumask_map(); + + reset_numa_cpu_lookup_table(); + + for_each_present_cpu(cpu) + numa_setup_cpu(cpu); +} + +void __init initmem_init(void) +{ + int nid; + + max_low_pfn = memblock_end_of_DRAM() >> PAGE_SHIFT; + max_pfn = max_low_pfn; + + memblock_dump_all(); + for_each_online_node(nid) { unsigned long start_pfn, end_pfn; @@ -863,10 +875,6 @@ void __init initmem_init(void) sparse_init(); - setup_node_to_cpumask_map(); - - reset_numa_cpu_lookup_table(); - /* * We need the numa_cpu_lookup_table to be accurate for all CPUs, * even before we online them, so that we can use cpu_to_{node,mem} @@ -876,8 +884,6 @@ void __init initmem_init(void) */ cpuhp_setup_state_nocalls(CPUHP_POWER_NUMA_PREPARE, "powerpc/numa:prepare", ppc_numa_cpu_prepare, ppc_numa_cpu_dead); - for_each_present_cpu(cpu) - numa_setup_cpu(cpu); } static int __init early_numa(char *p) @@ -1105,7 +1111,7 @@ static void setup_cpu_associativity_change_counters(void) for_each_possible_cpu(cpu) { int i; u8 *counts = vphn_cpu_change_counts[cpu]; - volatile u8 *hypervisor_counts = lppaca[cpu].vphn_assoc_counts; + volatile u8 *hypervisor_counts = lppaca_of(cpu).vphn_assoc_counts; for (i = 0; i < distance_ref_points_depth; i++) counts[i] = hypervisor_counts[i]; @@ -1131,7 +1137,7 @@ static int update_cpu_associativity_changes_mask(void) for_each_possible_cpu(cpu) { int i, changed = 0; u8 *counts = vphn_cpu_change_counts[cpu]; - volatile u8 *hypervisor_counts = lppaca[cpu].vphn_assoc_counts; + volatile u8 *hypervisor_counts = lppaca_of(cpu).vphn_assoc_counts; for (i = 0; i < distance_ref_points_depth; i++) { if (hypervisor_counts[i] != counts[i]) { diff --git a/arch/powerpc/mm/pgtable-book3s64.c b/arch/powerpc/mm/pgtable-book3s64.c index bd6ca74acf9e..518518fb7c45 100644 --- a/arch/powerpc/mm/pgtable-book3s64.c +++ b/arch/powerpc/mm/pgtable-book3s64.c @@ -155,12 +155,12 @@ void mmu_cleanup_all(void) } #ifdef CONFIG_MEMORY_HOTPLUG -int __meminit create_section_mapping(unsigned long start, unsigned long end) +int __meminit create_section_mapping(unsigned long start, unsigned long end, int nid) { if (radix_enabled()) - return radix__create_section_mapping(start, end); + return radix__create_section_mapping(start, end, nid); - return hash__create_section_mapping(start, end); + return hash__create_section_mapping(start, end, nid); } int __meminit remove_section_mapping(unsigned long start, unsigned long end) diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c index ab9db0afd2c8..7095384344b4 100644 --- a/arch/powerpc/mm/pgtable-radix.c +++ b/arch/powerpc/mm/pgtable-radix.c @@ -48,20 +48,88 @@ static int native_register_process_table(unsigned long base, unsigned long pg_sz return 0; } -static __ref void *early_alloc_pgtable(unsigned long size) +static __ref void *early_alloc_pgtable(unsigned long size, int nid, + unsigned long region_start, unsigned long region_end) { + unsigned long pa = 0; void *pt; - pt = __va(memblock_alloc_base(size, size, MEMBLOCK_ALLOC_ANYWHERE)); + if (region_start || region_end) /* has region hint */ + pa = memblock_alloc_range(size, size, region_start, region_end, + MEMBLOCK_NONE); + else if (nid != -1) /* has node hint */ + pa = memblock_alloc_base_nid(size, size, + MEMBLOCK_ALLOC_ANYWHERE, + nid, MEMBLOCK_NONE); + + if (!pa) + pa = memblock_alloc_base(size, size, MEMBLOCK_ALLOC_ANYWHERE); + + BUG_ON(!pa); + + pt = __va(pa); memset(pt, 0, size); return pt; } -int radix__map_kernel_page(unsigned long ea, unsigned long pa, +static int early_map_kernel_page(unsigned long ea, unsigned long pa, pgprot_t flags, - unsigned int map_page_size) + unsigned int map_page_size, + int nid, + unsigned long region_start, unsigned long region_end) { + unsigned long pfn = pa >> PAGE_SHIFT; + pgd_t *pgdp; + pud_t *pudp; + pmd_t *pmdp; + pte_t *ptep; + + pgdp = pgd_offset_k(ea); + if (pgd_none(*pgdp)) { + pudp = early_alloc_pgtable(PUD_TABLE_SIZE, nid, + region_start, region_end); + pgd_populate(&init_mm, pgdp, pudp); + } + pudp = pud_offset(pgdp, ea); + if (map_page_size == PUD_SIZE) { + ptep = (pte_t *)pudp; + goto set_the_pte; + } + if (pud_none(*pudp)) { + pmdp = early_alloc_pgtable(PMD_TABLE_SIZE, nid, + region_start, region_end); + pud_populate(&init_mm, pudp, pmdp); + } + pmdp = pmd_offset(pudp, ea); + if (map_page_size == PMD_SIZE) { + ptep = pmdp_ptep(pmdp); + goto set_the_pte; + } + if (!pmd_present(*pmdp)) { + ptep = early_alloc_pgtable(PAGE_SIZE, nid, + region_start, region_end); + pmd_populate_kernel(&init_mm, pmdp, ptep); + } + ptep = pte_offset_kernel(pmdp, ea); + +set_the_pte: + set_pte_at(&init_mm, ea, ptep, pfn_pte(pfn, flags)); + smp_wmb(); + return 0; +} + +/* + * nid, region_start, and region_end are hints to try to place the page + * table memory in the same node or region. + */ +static int __map_kernel_page(unsigned long ea, unsigned long pa, + pgprot_t flags, + unsigned int map_page_size, + int nid, + unsigned long region_start, unsigned long region_end) +{ + unsigned long pfn = pa >> PAGE_SHIFT; pgd_t *pgdp; pud_t *pudp; pmd_t *pmdp; @@ -70,61 +138,48 @@ int radix__map_kernel_page(unsigned long ea, unsigned long pa, * Make sure task size is correct as per the max adddr */ BUILD_BUG_ON(TASK_SIZE_USER64 > RADIX_PGTABLE_RANGE); - if (slab_is_available()) { - pgdp = pgd_offset_k(ea); - pudp = pud_alloc(&init_mm, pgdp, ea); - if (!pudp) - return -ENOMEM; - if (map_page_size == PUD_SIZE) { - ptep = (pte_t *)pudp; - goto set_the_pte; - } - pmdp = pmd_alloc(&init_mm, pudp, ea); - if (!pmdp) - return -ENOMEM; - if (map_page_size == PMD_SIZE) { - ptep = pmdp_ptep(pmdp); - goto set_the_pte; - } - ptep = pte_alloc_kernel(pmdp, ea); - if (!ptep) - return -ENOMEM; - } else { - pgdp = pgd_offset_k(ea); - if (pgd_none(*pgdp)) { - pudp = early_alloc_pgtable(PUD_TABLE_SIZE); - BUG_ON(pudp == NULL); - pgd_populate(&init_mm, pgdp, pudp); - } - pudp = pud_offset(pgdp, ea); - if (map_page_size == PUD_SIZE) { - ptep = (pte_t *)pudp; - goto set_the_pte; - } - if (pud_none(*pudp)) { - pmdp = early_alloc_pgtable(PMD_TABLE_SIZE); - BUG_ON(pmdp == NULL); - pud_populate(&init_mm, pudp, pmdp); - } - pmdp = pmd_offset(pudp, ea); - if (map_page_size == PMD_SIZE) { - ptep = pmdp_ptep(pmdp); - goto set_the_pte; - } - if (!pmd_present(*pmdp)) { - ptep = early_alloc_pgtable(PAGE_SIZE); - BUG_ON(ptep == NULL); - pmd_populate_kernel(&init_mm, pmdp, ptep); - } - ptep = pte_offset_kernel(pmdp, ea); + + if (unlikely(!slab_is_available())) + return early_map_kernel_page(ea, pa, flags, map_page_size, + nid, region_start, region_end); + + /* + * Should make page table allocation functions be able to take a + * node, so we can place kernel page tables on the right nodes after + * boot. + */ + pgdp = pgd_offset_k(ea); + pudp = pud_alloc(&init_mm, pgdp, ea); + if (!pudp) + return -ENOMEM; + if (map_page_size == PUD_SIZE) { + ptep = (pte_t *)pudp; + goto set_the_pte; } + pmdp = pmd_alloc(&init_mm, pudp, ea); + if (!pmdp) + return -ENOMEM; + if (map_page_size == PMD_SIZE) { + ptep = pmdp_ptep(pmdp); + goto set_the_pte; + } + ptep = pte_alloc_kernel(pmdp, ea); + if (!ptep) + return -ENOMEM; set_the_pte: - set_pte_at(&init_mm, ea, ptep, pfn_pte(pa >> PAGE_SHIFT, flags)); + set_pte_at(&init_mm, ea, ptep, pfn_pte(pfn, flags)); smp_wmb(); return 0; } +int radix__map_kernel_page(unsigned long ea, unsigned long pa, + pgprot_t flags, + unsigned int map_page_size) +{ + return __map_kernel_page(ea, pa, flags, map_page_size, -1, 0, 0); +} + #ifdef CONFIG_STRICT_KERNEL_RWX void radix__change_memory_range(unsigned long start, unsigned long end, unsigned long clear) @@ -211,7 +266,8 @@ static inline void __meminit print_mapping(unsigned long start, } static int __meminit create_physical_mapping(unsigned long start, - unsigned long end) + unsigned long end, + int nid) { unsigned long vaddr, addr, mapping_size = 0; pgprot_t prot; @@ -267,7 +323,7 @@ retry: else prot = PAGE_KERNEL; - rc = radix__map_kernel_page(vaddr, addr, prot, mapping_size); + rc = __map_kernel_page(vaddr, addr, prot, mapping_size, nid, start, end); if (rc) return rc; } @@ -276,7 +332,7 @@ retry: return 0; } -static void __init radix_init_pgtable(void) +void __init radix_init_pgtable(void) { unsigned long rts_field; struct memblock_region *reg; @@ -286,9 +342,16 @@ static void __init radix_init_pgtable(void) /* * Create the linear mapping, using standard page size for now */ - for_each_memblock(memory, reg) + for_each_memblock(memory, reg) { + /* + * The memblock allocator is up at this point, so the + * page tables will be allocated within the range. No + * need or a node (which we don't have yet). + */ WARN_ON(create_physical_mapping(reg->base, - reg->base + reg->size)); + reg->base + reg->size, + -1)); + } /* Find out how many PID bits are supported */ if (cpu_has_feature(CPU_FTR_HVMODE)) { @@ -317,7 +380,7 @@ static void __init radix_init_pgtable(void) * host. */ BUG_ON(PRTB_SIZE_SHIFT > 36); - process_tb = early_alloc_pgtable(1UL << PRTB_SIZE_SHIFT); + process_tb = early_alloc_pgtable(1UL << PRTB_SIZE_SHIFT, -1, 0, 0); /* * Fill in the process table. */ @@ -705,8 +768,8 @@ static int __meminit stop_machine_change_mapping(void *data) spin_unlock(&init_mm.page_table_lock); pte_clear(&init_mm, params->aligned_start, params->pte); - create_physical_mapping(params->aligned_start, params->start); - create_physical_mapping(params->end, params->aligned_end); + create_physical_mapping(params->aligned_start, params->start, -1); + create_physical_mapping(params->end, params->aligned_end, -1); spin_lock(&init_mm.page_table_lock); return 0; } @@ -863,9 +926,9 @@ static void __meminit remove_pagetable(unsigned long start, unsigned long end) radix__flush_tlb_kernel_range(start, end); } -int __meminit radix__create_section_mapping(unsigned long start, unsigned long end) +int __meminit radix__create_section_mapping(unsigned long start, unsigned long end, int nid) { - return create_physical_mapping(start, end); + return create_physical_mapping(start, end, nid); } int __meminit radix__remove_section_mapping(unsigned long start, unsigned long end) @@ -876,14 +939,25 @@ int __meminit radix__remove_section_mapping(unsigned long start, unsigned long e #endif /* CONFIG_MEMORY_HOTPLUG */ #ifdef CONFIG_SPARSEMEM_VMEMMAP +static int __map_kernel_page_nid(unsigned long ea, unsigned long pa, + pgprot_t flags, unsigned int map_page_size, + int nid) +{ + return __map_kernel_page(ea, pa, flags, map_page_size, nid, 0, 0); +} + int __meminit radix__vmemmap_create_mapping(unsigned long start, unsigned long page_size, unsigned long phys) { /* Create a PTE encoding */ unsigned long flags = _PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_KERNEL_RW; + int nid = early_pfn_to_nid(phys >> PAGE_SHIFT); + int ret; + + ret = __map_kernel_page_nid(start, phys, __pgprot(flags), page_size, nid); + BUG_ON(ret); - BUG_ON(radix__map_kernel_page(start, phys, __pgprot(flags), page_size)); return 0; } diff --git a/arch/powerpc/mm/tlb-radix.c b/arch/powerpc/mm/tlb-radix.c index 291eab4ed7cd..a8b178dd2e82 100644 --- a/arch/powerpc/mm/tlb-radix.c +++ b/arch/powerpc/mm/tlb-radix.c @@ -734,7 +734,7 @@ extern void radix_kvm_prefetch_workaround(struct mm_struct *mm) for (; sib <= cpu_last_thread_sibling(cpu) && !flush; sib++) { if (sib == cpu) continue; - if (paca[sib].kvm_hstate.kvm_vcpu) + if (paca_ptrs[sib]->kvm_hstate.kvm_vcpu) flush = true; } if (flush) diff --git a/arch/powerpc/platforms/85xx/smp.c b/arch/powerpc/platforms/85xx/smp.c index f51fd35f4618..7e966f4cf19a 100644 --- a/arch/powerpc/platforms/85xx/smp.c +++ b/arch/powerpc/platforms/85xx/smp.c @@ -147,7 +147,7 @@ static void qoriq_cpu_kill(unsigned int cpu) for (i = 0; i < 500; i++) { if (is_cpu_dead(cpu)) { #ifdef CONFIG_PPC64 - paca[cpu].cpu_start = 0; + paca_ptrs[cpu]->cpu_start = 0; #endif return; } @@ -328,7 +328,7 @@ static int smp_85xx_kick_cpu(int nr) return ret; done: - paca[nr].cpu_start = 1; + paca_ptrs[nr]->cpu_start = 1; generic_set_cpu_up(nr); return ret; @@ -409,14 +409,14 @@ void mpc85xx_smp_kexec_cpu_down(int crash_shutdown, int secondary) } if (disable_threadbit) { - while (paca[disable_cpu].kexec_state < KEXEC_STATE_REAL_MODE) { + while (paca_ptrs[disable_cpu]->kexec_state < KEXEC_STATE_REAL_MODE) { barrier(); now = mftb(); if (!notified && now - start > 1000000) { pr_info("%s/%d: waiting for cpu %d to enter KEXEC_STATE_REAL_MODE (%d)\n", __func__, smp_processor_id(), disable_cpu, - paca[disable_cpu].kexec_state); + paca_ptrs[disable_cpu]->kexec_state); notified = true; } } diff --git a/arch/powerpc/platforms/cell/smp.c b/arch/powerpc/platforms/cell/smp.c index f84d52a2db40..1aeac5761e0b 100644 --- a/arch/powerpc/platforms/cell/smp.c +++ b/arch/powerpc/platforms/cell/smp.c @@ -83,7 +83,7 @@ static inline int smp_startup_cpu(unsigned int lcpu) pcpu = get_hard_smp_processor_id(lcpu); /* Fixup atomic count: it exited inside IRQ handler. */ - task_thread_info(paca[lcpu].__current)->preempt_count = 0; + task_thread_info(paca_ptrs[lcpu]->__current)->preempt_count = 0; /* * If the RTAS start-cpu token does not exist then presume the @@ -126,7 +126,7 @@ static int smp_cell_kick_cpu(int nr) * cpu_start field to become non-zero After we set cpu_start, * the processor will continue on to secondary_start */ - paca[nr].cpu_start = 1; + paca_ptrs[nr]->cpu_start = 1; return 0; } diff --git a/arch/powerpc/platforms/powernv/idle.c b/arch/powerpc/platforms/powernv/idle.c index 99a760eae964..d9e366bb23da 100644 --- a/arch/powerpc/platforms/powernv/idle.c +++ b/arch/powerpc/platforms/powernv/idle.c @@ -81,7 +81,7 @@ static int pnv_save_sprs_for_deep_states(void) for_each_possible_cpu(cpu) { uint64_t pir = get_hard_smp_processor_id(cpu); - uint64_t hsprg0_val = (uint64_t)&paca[cpu]; + uint64_t hsprg0_val = (uint64_t)paca_ptrs[cpu]; rc = opal_slw_set_reg(pir, SPRN_HSPRG0, hsprg0_val); if (rc != 0) @@ -174,12 +174,12 @@ static void pnv_alloc_idle_core_states(void) for (j = 0; j < threads_per_core; j++) { int cpu = first_cpu + j; - paca[cpu].core_idle_state_ptr = core_idle_state; - paca[cpu].thread_idle_state = PNV_THREAD_RUNNING; - paca[cpu].thread_mask = 1 << j; + paca_ptrs[cpu]->core_idle_state_ptr = core_idle_state; + paca_ptrs[cpu]->thread_idle_state = PNV_THREAD_RUNNING; + paca_ptrs[cpu]->thread_mask = 1 << j; if (!cpu_has_feature(CPU_FTR_POWER9_DD1)) continue; - paca[cpu].thread_sibling_pacas = + paca_ptrs[cpu]->thread_sibling_pacas = kmalloc_node(paca_ptr_array_size, GFP_KERNEL, node); } @@ -405,22 +405,20 @@ void power9_idle(void) void pnv_power9_force_smt4_catch(void) { int cpu, cpu0, thr; - struct paca_struct *tpaca; int awake_threads = 1; /* this thread is awake */ int poke_threads = 0; int need_awake = threads_per_core; cpu = smp_processor_id(); cpu0 = cpu & ~(threads_per_core - 1); - tpaca = &paca[cpu0]; for (thr = 0; thr < threads_per_core; ++thr) { if (cpu != cpu0 + thr) - atomic_inc(&tpaca[thr].dont_stop); + atomic_inc(&paca_ptrs[cpu0+thr]->dont_stop); } /* order setting dont_stop vs testing requested_psscr */ mb(); for (thr = 0; thr < threads_per_core; ++thr) { - if (!tpaca[thr].requested_psscr) + if (!paca_ptrs[cpu0+thr]->requested_psscr) ++awake_threads; else poke_threads |= (1 << thr); @@ -433,14 +431,14 @@ void pnv_power9_force_smt4_catch(void) if (poke_threads & (1 << thr)) { ppc_msgsnd_sync(); ppc_msgsnd(PPC_DBELL_MSGTYPE, 0, - tpaca[thr].hw_cpu_id); + paca_ptrs[cpu0+thr]->hw_cpu_id); } } /* now spin until at least 3 threads are awake */ do { for (thr = 0; thr < threads_per_core; ++thr) { if ((poke_threads & (1 << thr)) && - !tpaca[thr].requested_psscr) { + !paca_ptrs[cpu0+thr]->requested_psscr) { ++awake_threads; poke_threads &= ~(1 << thr); } @@ -453,16 +451,14 @@ EXPORT_SYMBOL_GPL(pnv_power9_force_smt4_catch); void pnv_power9_force_smt4_release(void) { int cpu, cpu0, thr; - struct paca_struct *tpaca; cpu = smp_processor_id(); cpu0 = cpu & ~(threads_per_core - 1); - tpaca = &paca[cpu0]; /* clear all the dont_stop flags */ for (thr = 0; thr < threads_per_core; ++thr) { if (cpu != cpu0 + thr) - atomic_dec(&tpaca[thr].dont_stop); + atomic_dec(&paca_ptrs[cpu0+thr]->dont_stop); } } EXPORT_SYMBOL_GPL(pnv_power9_force_smt4_release); @@ -830,7 +826,8 @@ static int __init pnv_init_idle_states(void) for (i = 0; i < threads_per_core; i++) { int j = base_cpu + i; - paca[j].thread_sibling_pacas[idx] = &paca[cpu]; + paca_ptrs[j]->thread_sibling_pacas[idx] = + paca_ptrs[cpu]; } } } diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c index 7de050a3736b..5f963286232f 100644 --- a/arch/powerpc/platforms/powernv/setup.c +++ b/arch/powerpc/platforms/powernv/setup.c @@ -293,7 +293,7 @@ static void pnv_kexec_wait_secondaries_down(void) if (i != notified) { printk(KERN_INFO "kexec: waiting for cpu %d " "(physical %d) to enter OPAL\n", - i, paca[i].hw_cpu_id); + i, paca_ptrs[i]->hw_cpu_id); notified = i; } @@ -305,7 +305,7 @@ static void pnv_kexec_wait_secondaries_down(void) if (timeout-- == 0) { printk(KERN_ERR "kexec: timed out waiting for " "cpu %d (physical %d) to enter OPAL\n", - i, paca[i].hw_cpu_id); + i, paca_ptrs[i]->hw_cpu_id); break; } } diff --git a/arch/powerpc/platforms/powernv/smp.c b/arch/powerpc/platforms/powernv/smp.c index 9664c8461f03..19af6de6b6f0 100644 --- a/arch/powerpc/platforms/powernv/smp.c +++ b/arch/powerpc/platforms/powernv/smp.c @@ -80,7 +80,7 @@ static int pnv_smp_kick_cpu(int nr) * If we already started or OPAL is not supported, we just * kick the CPU via the PACA */ - if (paca[nr].cpu_start || !firmware_has_feature(FW_FEATURE_OPAL)) + if (paca_ptrs[nr]->cpu_start || !firmware_has_feature(FW_FEATURE_OPAL)) goto kick; /* diff --git a/arch/powerpc/platforms/powernv/subcore.c b/arch/powerpc/platforms/powernv/subcore.c index 596ae2e98040..45563004feda 100644 --- a/arch/powerpc/platforms/powernv/subcore.c +++ b/arch/powerpc/platforms/powernv/subcore.c @@ -280,7 +280,7 @@ void update_subcore_sibling_mask(void) int offset = (tid / threads_per_subcore) * threads_per_subcore; int mask = sibling_mask_first_cpu << offset; - paca[cpu].subcore_sibling_mask = mask; + paca_ptrs[cpu]->subcore_sibling_mask = mask; } } diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c index 652d3e96b812..6ef77caf7bcf 100644 --- a/arch/powerpc/platforms/pseries/hotplug-cpu.c +++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c @@ -234,7 +234,7 @@ static void pseries_cpu_die(unsigned int cpu) * done here. Change isolate state to Isolate and * change allocation-state to Unusable. */ - paca[cpu].cpu_start = 0; + paca_ptrs[cpu]->cpu_start = 0; } /* diff --git a/arch/powerpc/platforms/pseries/kexec.c b/arch/powerpc/platforms/pseries/kexec.c index eeb13429d685..3fe126796975 100644 --- a/arch/powerpc/platforms/pseries/kexec.c +++ b/arch/powerpc/platforms/pseries/kexec.c @@ -23,7 +23,12 @@ void pseries_kexec_cpu_down(int crash_shutdown, int secondary) { - /* Don't risk a hypervisor call if we're crashing */ + /* + * Don't risk a hypervisor call if we're crashing + * XXX: Why? The hypervisor is not crashing. It might be better + * to at least attempt unregister to avoid the hypervisor stepping + * on our memory. + */ if (firmware_has_feature(FW_FEATURE_SPLPAR) && !crash_shutdown) { int ret; int cpu = smp_processor_id(); diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index 238b55fb8007..adb996ed51e1 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -99,7 +99,7 @@ void vpa_init(int cpu) * reports that. All SPLPAR support SLB shadow buffer. */ if (!radix_enabled() && firmware_has_feature(FW_FEATURE_SPLPAR)) { - addr = __pa(paca[cpu].slb_shadow_ptr); + addr = __pa(paca_ptrs[cpu]->slb_shadow_ptr); ret = register_slb_shadow(hwcpu, addr); if (ret) pr_err("WARNING: SLB shadow buffer registration for " @@ -111,7 +111,7 @@ void vpa_init(int cpu) /* * Register dispatch trace log, if one has been allocated. */ - pp = &paca[cpu]; + pp = paca_ptrs[cpu]; dtl = pp->dispatch_log; if (dtl) { pp->dtl_ridx = 0; diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index b11564f2a4c7..98bca8d9c9e0 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -247,7 +247,7 @@ static int alloc_dispatch_logs(void) return 0; for_each_possible_cpu(cpu) { - pp = &paca[cpu]; + pp = paca_ptrs[cpu]; dtl = kmem_cache_alloc(dtl_cache, GFP_KERNEL); if (!dtl) { pr_warn("Failed to allocate dispatch trace log for cpu %d\n", diff --git a/arch/powerpc/platforms/pseries/smp.c b/arch/powerpc/platforms/pseries/smp.c index 66b6f119d599..3df46123cce3 100644 --- a/arch/powerpc/platforms/pseries/smp.c +++ b/arch/powerpc/platforms/pseries/smp.c @@ -110,7 +110,7 @@ static inline int smp_startup_cpu(unsigned int lcpu) } /* Fixup atomic count: it exited inside IRQ handler. */ - task_thread_info(paca[lcpu].__current)->preempt_count = 0; + task_thread_info(paca_ptrs[lcpu]->__current)->preempt_count = 0; #ifdef CONFIG_HOTPLUG_CPU if (get_cpu_current_state(lcpu) == CPU_STATE_INACTIVE) goto out; @@ -165,7 +165,7 @@ static int smp_pSeries_kick_cpu(int nr) * cpu_start field to become non-zero After we set cpu_start, * the processor will continue on to secondary_start */ - paca[nr].cpu_start = 1; + paca_ptrs[nr]->cpu_start = 1; #ifdef CONFIG_HOTPLUG_CPU set_preferred_offline_state(nr, CPU_STATE_ONLINE); diff --git a/arch/powerpc/sysdev/mpic.c b/arch/powerpc/sysdev/mpic.c index 73067805300a..1d4e0ef658d3 100644 --- a/arch/powerpc/sysdev/mpic.c +++ b/arch/powerpc/sysdev/mpic.c @@ -626,7 +626,7 @@ static inline u32 mpic_physmask(u32 cpumask) int i; u32 mask = 0; - for (i = 0; i < min(32, NR_CPUS); ++i, cpumask >>= 1) + for (i = 0; i < min(32, NR_CPUS) && cpu_possible(i); ++i, cpumask >>= 1) mask |= (cpumask & 1) << get_hard_smp_processor_id(i); return mask; } diff --git a/arch/powerpc/sysdev/xics/icp-native.c b/arch/powerpc/sysdev/xics/icp-native.c index 1459f4e8b698..37bfbc54aacb 100644 --- a/arch/powerpc/sysdev/xics/icp-native.c +++ b/arch/powerpc/sysdev/xics/icp-native.c @@ -164,7 +164,7 @@ void icp_native_cause_ipi_rm(int cpu) * Just like the cause_ipi functions, it is required to * include a full barrier before causing the IPI. */ - xics_phys = paca[cpu].kvm_hstate.xics_phys; + xics_phys = paca_ptrs[cpu]->kvm_hstate.xics_phys; mb(); __raw_rm_writeb(IPI_PRIORITY, xics_phys + XICS_MFRR); } diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index 53918023622e..a0842f1ff72c 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -2341,7 +2341,7 @@ static void dump_one_paca(int cpu) catch_memory_errors = 1; sync(); - p = &paca[cpu]; + p = paca_ptrs[cpu]; printf("paca for cpu 0x%x @ %px:\n", cpu, p); diff --git a/include/linux/memblock.h b/include/linux/memblock.h index 8be5077efb5f..4e1e3d0b002a 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -319,6 +319,9 @@ static inline bool memblock_bottom_up(void) phys_addr_t __init memblock_alloc_range(phys_addr_t size, phys_addr_t align, phys_addr_t start, phys_addr_t end, ulong flags); +phys_addr_t memblock_alloc_base_nid(phys_addr_t size, + phys_addr_t align, phys_addr_t max_addr, + int nid, ulong flags); phys_addr_t memblock_alloc_base(phys_addr_t size, phys_addr_t align, phys_addr_t max_addr); phys_addr_t __memblock_alloc_base(phys_addr_t size, phys_addr_t align, diff --git a/mm/memblock.c b/mm/memblock.c index 5a9ca2a1751b..cea2af494da0 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -1190,7 +1190,7 @@ phys_addr_t __init memblock_alloc_range(phys_addr_t size, phys_addr_t align, flags); } -static phys_addr_t __init memblock_alloc_base_nid(phys_addr_t size, +phys_addr_t __init memblock_alloc_base_nid(phys_addr_t size, phys_addr_t align, phys_addr_t max_addr, int nid, ulong flags) {