2005-04-17 06:20:36 +08:00
|
|
|
/*
|
|
|
|
* PowerPC64 Segment Translation Support.
|
|
|
|
*
|
|
|
|
* Dave Engebretsen and Mike Corrigan {engebret|mikejc}@us.ibm.com
|
|
|
|
* Copyright (c) 2001 Dave Engebretsen
|
|
|
|
*
|
|
|
|
* Copyright (C) 2002 Anton Blanchard <anton@au.ibm.com>, IBM
|
|
|
|
*
|
|
|
|
* This program is free software; you can redistribute it and/or
|
|
|
|
* modify it under the terms of the GNU General Public License
|
|
|
|
* as published by the Free Software Foundation; either version
|
|
|
|
* 2 of the License, or (at your option) any later version.
|
|
|
|
*/
|
|
|
|
|
2010-07-12 12:36:09 +08:00
|
|
|
#include <linux/memblock.h>
|
2008-02-14 08:56:49 +08:00
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
#include <asm/pgtable.h>
|
|
|
|
#include <asm/mmu.h>
|
|
|
|
#include <asm/mmu_context.h>
|
|
|
|
#include <asm/paca.h>
|
|
|
|
#include <asm/cputable.h>
|
2008-02-14 08:56:49 +08:00
|
|
|
#include <asm/prom.h>
|
2005-07-28 02:44:19 +08:00
|
|
|
#include <asm/abs_addr.h>
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2005-05-06 07:15:13 +08:00
|
|
|
struct stab_entry {
|
|
|
|
unsigned long esid_data;
|
|
|
|
unsigned long vsid_data;
|
|
|
|
};
|
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
#define NR_STAB_CACHE_ENTRIES 8
|
2008-05-08 12:27:07 +08:00
|
|
|
static DEFINE_PER_CPU(long, stab_cache_ptr);
|
2009-06-24 14:13:45 +08:00
|
|
|
static DEFINE_PER_CPU(long [NR_STAB_CACHE_ENTRIES], stab_cache);
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Create a segment table entry for the given esid/vsid pair.
|
|
|
|
*/
|
|
|
|
static int make_ste(unsigned long stab, unsigned long esid, unsigned long vsid)
|
|
|
|
{
|
|
|
|
unsigned long esid_data, vsid_data;
|
|
|
|
unsigned long entry, group, old_esid, castout_entry, i;
|
|
|
|
unsigned int global_entry;
|
|
|
|
struct stab_entry *ste, *castout_ste;
|
2005-12-06 00:24:33 +08:00
|
|
|
unsigned long kernel_segment = (esid << SID_SHIFT) >= PAGE_OFFSET;
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
vsid_data = vsid << STE_VSID_SHIFT;
|
|
|
|
esid_data = esid << SID_SHIFT | STE_ESID_KP | STE_ESID_V;
|
|
|
|
if (! kernel_segment)
|
|
|
|
esid_data |= STE_ESID_KS;
|
|
|
|
|
|
|
|
/* Search the primary group first. */
|
|
|
|
global_entry = (esid & 0x1f) << 3;
|
|
|
|
ste = (struct stab_entry *)(stab | ((esid & 0x1f) << 7));
|
|
|
|
|
|
|
|
/* Find an empty entry, if one exists. */
|
|
|
|
for (group = 0; group < 2; group++) {
|
|
|
|
for (entry = 0; entry < 8; entry++, ste++) {
|
|
|
|
if (!(ste->esid_data & STE_ESID_V)) {
|
|
|
|
ste->vsid_data = vsid_data;
|
2007-07-10 12:49:09 +08:00
|
|
|
eieio();
|
2005-04-17 06:20:36 +08:00
|
|
|
ste->esid_data = esid_data;
|
|
|
|
return (global_entry | entry);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
/* Now search the secondary group. */
|
|
|
|
global_entry = ((~esid) & 0x1f) << 3;
|
|
|
|
ste = (struct stab_entry *)(stab | (((~esid) & 0x1f) << 7));
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Could not find empty entry, pick one with a round robin selection.
|
|
|
|
* Search all entries in the two groups.
|
|
|
|
*/
|
|
|
|
castout_entry = get_paca()->stab_rr;
|
|
|
|
for (i = 0; i < 16; i++) {
|
|
|
|
if (castout_entry < 8) {
|
|
|
|
global_entry = (esid & 0x1f) << 3;
|
|
|
|
ste = (struct stab_entry *)(stab | ((esid & 0x1f) << 7));
|
|
|
|
castout_ste = ste + castout_entry;
|
|
|
|
} else {
|
|
|
|
global_entry = ((~esid) & 0x1f) << 3;
|
|
|
|
ste = (struct stab_entry *)(stab | (((~esid) & 0x1f) << 7));
|
|
|
|
castout_ste = ste + (castout_entry - 8);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Dont cast out the first kernel segment */
|
2005-12-06 00:24:33 +08:00
|
|
|
if ((castout_ste->esid_data & ESID_MASK) != PAGE_OFFSET)
|
2005-04-17 06:20:36 +08:00
|
|
|
break;
|
|
|
|
|
|
|
|
castout_entry = (castout_entry + 1) & 0xf;
|
|
|
|
}
|
|
|
|
|
|
|
|
get_paca()->stab_rr = (castout_entry + 1) & 0xf;
|
|
|
|
|
|
|
|
/* Modify the old entry to the new value. */
|
|
|
|
|
|
|
|
/* Force previous translations to complete. DRENG */
|
|
|
|
asm volatile("isync" : : : "memory");
|
|
|
|
|
|
|
|
old_esid = castout_ste->esid_data >> SID_SHIFT;
|
|
|
|
castout_ste->esid_data = 0; /* Invalidate old entry */
|
|
|
|
|
|
|
|
asm volatile("sync" : : : "memory"); /* Order update */
|
|
|
|
|
|
|
|
castout_ste->vsid_data = vsid_data;
|
2007-07-10 12:49:09 +08:00
|
|
|
eieio(); /* Order update */
|
2005-04-17 06:20:36 +08:00
|
|
|
castout_ste->esid_data = esid_data;
|
|
|
|
|
|
|
|
asm volatile("slbie %0" : : "r" (old_esid << SID_SHIFT));
|
|
|
|
/* Ensure completion of slbie */
|
|
|
|
asm volatile("sync" : : : "memory");
|
|
|
|
|
|
|
|
return (global_entry | (castout_entry & 0x7));
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Allocate a segment table entry for the given ea and mm
|
|
|
|
*/
|
|
|
|
static int __ste_allocate(unsigned long ea, struct mm_struct *mm)
|
|
|
|
{
|
|
|
|
unsigned long vsid;
|
|
|
|
unsigned char stab_entry;
|
|
|
|
unsigned long offset;
|
|
|
|
|
|
|
|
/* Kernel or user address? */
|
2005-12-04 15:39:15 +08:00
|
|
|
if (is_kernel_addr(ea)) {
|
2007-10-11 18:37:10 +08:00
|
|
|
vsid = get_kernel_vsid(ea, MMU_SEGSIZE_256M);
|
2005-04-17 06:20:36 +08:00
|
|
|
} else {
|
|
|
|
if ((ea >= TASK_SIZE_USER64) || (! mm))
|
|
|
|
return 1;
|
|
|
|
|
2007-10-11 18:37:10 +08:00
|
|
|
vsid = get_vsid(mm->context.id, ea, MMU_SEGSIZE_256M);
|
2005-04-17 06:20:36 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
stab_entry = make_ste(get_paca()->stab_addr, GET_ESID(ea), vsid);
|
|
|
|
|
2005-12-04 15:39:15 +08:00
|
|
|
if (!is_kernel_addr(ea)) {
|
2005-04-17 06:20:36 +08:00
|
|
|
offset = __get_cpu_var(stab_cache_ptr);
|
|
|
|
if (offset < NR_STAB_CACHE_ENTRIES)
|
|
|
|
__get_cpu_var(stab_cache[offset++]) = stab_entry;
|
|
|
|
else
|
|
|
|
offset = NR_STAB_CACHE_ENTRIES+1;
|
|
|
|
__get_cpu_var(stab_cache_ptr) = offset;
|
|
|
|
|
|
|
|
/* Order update */
|
|
|
|
asm volatile("sync":::"memory");
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
int ste_allocate(unsigned long ea)
|
|
|
|
{
|
|
|
|
return __ste_allocate(ea, current->mm);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Do the segment table work for a context switch: flush all user
|
|
|
|
* entries from the table, then preload some probably useful entries
|
|
|
|
* for the new task
|
|
|
|
*/
|
|
|
|
void switch_stab(struct task_struct *tsk, struct mm_struct *mm)
|
|
|
|
{
|
|
|
|
struct stab_entry *stab = (struct stab_entry *) get_paca()->stab_addr;
|
|
|
|
struct stab_entry *ste;
|
powerpc: Allow perf_counters to access user memory at interrupt time
This provides a mechanism to allow the perf_counters code to access
user memory in a PMU interrupt routine. Such an access can cause
various kinds of interrupt: SLB miss, MMU hash table miss, segment
table miss, or TLB miss, depending on the processor. This commit
only deals with 64-bit classic/server processors, which use an MMU
hash table. 32-bit processors are already able to access user memory
at interrupt time. Since we don't soft-disable on 32-bit, we avoid
the possibility of reentering hash_page or the TLB miss handlers,
since they run with interrupts disabled.
On 64-bit processors, an SLB miss interrupt on a user address will
update the slb_cache and slb_cache_ptr fields in the paca. This is
OK except in the case where a PMU interrupt occurs in switch_slb,
which also accesses those fields. To prevent this, we hard-disable
interrupts in switch_slb. Interrupts are already soft-disabled at
this point, and will get hard-enabled when they get soft-enabled
later.
This also reworks slb_flush_and_rebolt: to avoid hard-disabling twice,
and to make sure that it clears the slb_cache_ptr when called from
other callers than switch_slb, the existing routine is renamed to
__slb_flush_and_rebolt, which is called by switch_slb and the new
version of slb_flush_and_rebolt.
Similarly, switch_stab (used on POWER3 and RS64 processors) gets a
hard_irq_disable() to protect the per-cpu variables used there and
in ste_allocate.
If a MMU hashtable miss interrupt occurs, normally we would call
hash_page to look up the Linux PTE for the address and create a HPTE.
However, hash_page is fairly complex and takes some locks, so to
avoid the possibility of deadlock, we check the preemption count
to see if we are in a (pseudo-)NMI handler, and if so, we don't call
hash_page but instead treat it like a bad access that will get
reported up through the exception table mechanism. An interrupt
whose handler runs even though the interrupt occurred when
soft-disabled (such as the PMU interrupt) is considered a pseudo-NMI
handler, which should use nmi_enter()/nmi_exit() rather than
irq_enter()/irq_exit().
Acked-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Paul Mackerras <paulus@samba.org>
2009-08-17 13:17:54 +08:00
|
|
|
unsigned long offset;
|
2005-04-17 06:20:36 +08:00
|
|
|
unsigned long pc = KSTK_EIP(tsk);
|
|
|
|
unsigned long stack = KSTK_ESP(tsk);
|
|
|
|
unsigned long unmapped_base;
|
|
|
|
|
|
|
|
/* Force previous translations to complete. DRENG */
|
|
|
|
asm volatile("isync" : : : "memory");
|
|
|
|
|
powerpc: Allow perf_counters to access user memory at interrupt time
This provides a mechanism to allow the perf_counters code to access
user memory in a PMU interrupt routine. Such an access can cause
various kinds of interrupt: SLB miss, MMU hash table miss, segment
table miss, or TLB miss, depending on the processor. This commit
only deals with 64-bit classic/server processors, which use an MMU
hash table. 32-bit processors are already able to access user memory
at interrupt time. Since we don't soft-disable on 32-bit, we avoid
the possibility of reentering hash_page or the TLB miss handlers,
since they run with interrupts disabled.
On 64-bit processors, an SLB miss interrupt on a user address will
update the slb_cache and slb_cache_ptr fields in the paca. This is
OK except in the case where a PMU interrupt occurs in switch_slb,
which also accesses those fields. To prevent this, we hard-disable
interrupts in switch_slb. Interrupts are already soft-disabled at
this point, and will get hard-enabled when they get soft-enabled
later.
This also reworks slb_flush_and_rebolt: to avoid hard-disabling twice,
and to make sure that it clears the slb_cache_ptr when called from
other callers than switch_slb, the existing routine is renamed to
__slb_flush_and_rebolt, which is called by switch_slb and the new
version of slb_flush_and_rebolt.
Similarly, switch_stab (used on POWER3 and RS64 processors) gets a
hard_irq_disable() to protect the per-cpu variables used there and
in ste_allocate.
If a MMU hashtable miss interrupt occurs, normally we would call
hash_page to look up the Linux PTE for the address and create a HPTE.
However, hash_page is fairly complex and takes some locks, so to
avoid the possibility of deadlock, we check the preemption count
to see if we are in a (pseudo-)NMI handler, and if so, we don't call
hash_page but instead treat it like a bad access that will get
reported up through the exception table mechanism. An interrupt
whose handler runs even though the interrupt occurred when
soft-disabled (such as the PMU interrupt) is considered a pseudo-NMI
handler, which should use nmi_enter()/nmi_exit() rather than
irq_enter()/irq_exit().
Acked-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Paul Mackerras <paulus@samba.org>
2009-08-17 13:17:54 +08:00
|
|
|
/*
|
|
|
|
* We need interrupts hard-disabled here, not just soft-disabled,
|
|
|
|
* so that a PMU interrupt can't occur, which might try to access
|
|
|
|
* user memory (to get a stack trace) and possible cause an STAB miss
|
|
|
|
* which would update the stab_cache/stab_cache_ptr per-cpu variables.
|
|
|
|
*/
|
|
|
|
hard_irq_disable();
|
|
|
|
|
|
|
|
offset = __get_cpu_var(stab_cache_ptr);
|
2005-04-17 06:20:36 +08:00
|
|
|
if (offset <= NR_STAB_CACHE_ENTRIES) {
|
|
|
|
int i;
|
|
|
|
|
|
|
|
for (i = 0; i < offset; i++) {
|
|
|
|
ste = stab + __get_cpu_var(stab_cache[i]);
|
|
|
|
ste->esid_data = 0; /* invalidate entry */
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
unsigned long entry;
|
|
|
|
|
|
|
|
/* Invalidate all entries. */
|
|
|
|
ste = stab;
|
|
|
|
|
|
|
|
/* Never flush the first entry. */
|
|
|
|
ste += 1;
|
|
|
|
for (entry = 1;
|
2005-11-07 08:06:55 +08:00
|
|
|
entry < (HW_PAGE_SIZE / sizeof(struct stab_entry));
|
2005-04-17 06:20:36 +08:00
|
|
|
entry++, ste++) {
|
|
|
|
unsigned long ea;
|
|
|
|
ea = ste->esid_data & ESID_MASK;
|
2005-12-04 15:39:15 +08:00
|
|
|
if (!is_kernel_addr(ea)) {
|
2005-04-17 06:20:36 +08:00
|
|
|
ste->esid_data = 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
asm volatile("sync; slbia; sync":::"memory");
|
|
|
|
|
|
|
|
__get_cpu_var(stab_cache_ptr) = 0;
|
|
|
|
|
|
|
|
/* Now preload some entries for the new task */
|
|
|
|
if (test_tsk_thread_flag(tsk, TIF_32BIT))
|
|
|
|
unmapped_base = TASK_UNMAPPED_BASE_USER32;
|
|
|
|
else
|
|
|
|
unmapped_base = TASK_UNMAPPED_BASE_USER64;
|
|
|
|
|
|
|
|
__ste_allocate(pc, mm);
|
|
|
|
|
|
|
|
if (GET_ESID(pc) == GET_ESID(stack))
|
|
|
|
return;
|
|
|
|
|
|
|
|
__ste_allocate(stack, mm);
|
|
|
|
|
|
|
|
if ((GET_ESID(pc) == GET_ESID(unmapped_base))
|
|
|
|
|| (GET_ESID(stack) == GET_ESID(unmapped_base)))
|
|
|
|
return;
|
|
|
|
|
|
|
|
__ste_allocate(unmapped_base, mm);
|
|
|
|
|
|
|
|
/* Order update */
|
|
|
|
asm volatile("sync" : : : "memory");
|
|
|
|
}
|
|
|
|
|
2005-07-28 02:44:19 +08:00
|
|
|
/*
|
|
|
|
* Allocate segment tables for secondary CPUs. These must all go in
|
|
|
|
* the first (bolted) segment, so that do_stab_bolted won't get a
|
|
|
|
* recursive segment miss on the segment table itself.
|
|
|
|
*/
|
2007-05-07 13:58:28 +08:00
|
|
|
void __init stabs_alloc(void)
|
2005-07-28 02:44:19 +08:00
|
|
|
{
|
|
|
|
int cpu;
|
|
|
|
|
2011-04-07 03:48:50 +08:00
|
|
|
if (mmu_has_feature(MMU_FTR_SLB))
|
2005-07-28 02:44:19 +08:00
|
|
|
return;
|
|
|
|
|
2006-03-29 06:50:51 +08:00
|
|
|
for_each_possible_cpu(cpu) {
|
2005-07-28 02:44:19 +08:00
|
|
|
unsigned long newstab;
|
|
|
|
|
|
|
|
if (cpu == 0)
|
|
|
|
continue; /* stab for CPU 0 is statically allocated */
|
|
|
|
|
2010-07-12 12:36:09 +08:00
|
|
|
newstab = memblock_alloc_base(HW_PAGE_SIZE, HW_PAGE_SIZE,
|
2005-11-07 08:06:55 +08:00
|
|
|
1<<SID_SHIFT);
|
2005-12-06 00:24:33 +08:00
|
|
|
newstab = (unsigned long)__va(newstab);
|
2005-07-28 02:44:19 +08:00
|
|
|
|
2005-11-07 08:06:55 +08:00
|
|
|
memset((void *)newstab, 0, HW_PAGE_SIZE);
|
2005-07-28 02:44:19 +08:00
|
|
|
|
|
|
|
paca[cpu].stab_addr = newstab;
|
|
|
|
paca[cpu].stab_real = virt_to_abs(newstab);
|
2009-01-06 22:26:03 +08:00
|
|
|
printk(KERN_INFO "Segment table for CPU %d at 0x%llx "
|
|
|
|
"virtual, 0x%llx absolute\n",
|
2005-11-07 08:06:55 +08:00
|
|
|
cpu, paca[cpu].stab_addr, paca[cpu].stab_real);
|
2005-07-28 02:44:19 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
/*
|
|
|
|
* Build an entry for the base kernel segment and put it into
|
|
|
|
* the segment table or SLB. All other segment table or SLB
|
|
|
|
* entries are faulted in.
|
|
|
|
*/
|
|
|
|
void stab_initialize(unsigned long stab)
|
|
|
|
{
|
2007-10-11 18:37:10 +08:00
|
|
|
unsigned long vsid = get_kernel_vsid(PAGE_OFFSET, MMU_SEGSIZE_256M);
|
2005-11-10 10:37:51 +08:00
|
|
|
unsigned long stabreal;
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2005-11-07 08:06:55 +08:00
|
|
|
asm volatile("isync; slbia; isync":::"memory");
|
2005-12-06 00:24:33 +08:00
|
|
|
make_ste(stab, GET_ESID(PAGE_OFFSET), vsid);
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2005-11-07 08:06:55 +08:00
|
|
|
/* Order update */
|
|
|
|
asm volatile("sync":::"memory");
|
2005-11-10 10:37:51 +08:00
|
|
|
|
|
|
|
/* Set ASR */
|
|
|
|
stabreal = get_paca()->stab_real | 0x1ul;
|
|
|
|
|
|
|
|
mtspr(SPRN_ASR, stabreal);
|
2005-04-17 06:20:36 +08:00
|
|
|
}
|