sh: TLB miss fast-path optimizations.
Handle simple TLB miss faults which can be resolved completely from the page table in assembler. Signed-off-by: Stuart Menefy <stuart.menefy@st.com> Signed-off-by: Paul Mundt <lethal@linux-sh.org>
This commit is contained in:
parent
9daa0c257d
commit
9b3a53ab76
|
@ -379,6 +379,9 @@ config CPU_HAS_SR_RB
|
|||
See <file:Documentation/sh/register-banks.txt> for further
|
||||
information on SR.RB and register banking in the kernel in general.
|
||||
|
||||
config CPU_HAS_PTEA
|
||||
bool
|
||||
|
||||
endmenu
|
||||
|
||||
menu "Timer support"
|
||||
|
|
|
@ -13,8 +13,10 @@
|
|||
#include <linux/linkage.h>
|
||||
#include <asm/asm-offsets.h>
|
||||
#include <asm/thread_info.h>
|
||||
#include <asm/cpu/mmu_context.h>
|
||||
#include <asm/unistd.h>
|
||||
#include <asm/cpu/mmu_context.h>
|
||||
#include <asm/pgtable.h>
|
||||
#include <asm/page.h>
|
||||
|
||||
! NOTE:
|
||||
! GNU as (as of 2.9.1) changes bf/s into bt/s and bra, when the address
|
||||
|
@ -136,29 +138,14 @@ ENTRY(tlb_protection_violation_store)
|
|||
|
||||
call_dpf:
|
||||
mov.l 1f, r0
|
||||
mov r5, r8
|
||||
mov.l @r0, r6
|
||||
mov r6, r9
|
||||
mov.l 2f, r0
|
||||
sts pr, r10
|
||||
jsr @r0
|
||||
mov r15, r4
|
||||
!
|
||||
tst r0, r0
|
||||
bf/s 0f
|
||||
lds r10, pr
|
||||
rts
|
||||
nop
|
||||
0: sti
|
||||
mov.l @r0, r6 ! address
|
||||
mov.l 3f, r0
|
||||
mov r9, r6
|
||||
mov r8, r5
|
||||
sti
|
||||
jmp @r0
|
||||
mov r15, r4
|
||||
mov r15, r4 ! regs
|
||||
|
||||
.align 2
|
||||
1: .long MMU_TEA
|
||||
2: .long __do_page_fault
|
||||
3: .long do_page_fault
|
||||
|
||||
.align 2
|
||||
|
@ -344,9 +331,176 @@ general_exception:
|
|||
2: .long ret_from_exception
|
||||
!
|
||||
!
|
||||
|
||||
/* This code makes some assumptions to improve performance.
|
||||
* Make sure they are stil true. */
|
||||
#if PTRS_PER_PGD != PTRS_PER_PTE
|
||||
#error PDG and PTE sizes don't match
|
||||
#endif
|
||||
|
||||
/* gas doesn't flag impossible values for mov #immediate as an error */
|
||||
#if (_PAGE_PRESENT >> 2) > 0x7f
|
||||
#error cannot load PAGE_PRESENT as an immediate
|
||||
#endif
|
||||
#if _PAGE_DIRTY > 0x7f
|
||||
#error cannot load PAGE_DIRTY as an immediate
|
||||
#endif
|
||||
#if (_PAGE_PRESENT << 2) != _PAGE_ACCESSED
|
||||
#error cannot derive PAGE_ACCESSED from PAGE_PRESENT
|
||||
#endif
|
||||
|
||||
#if defined(CONFIG_CPU_SH4)
|
||||
#define ldmmupteh(r) mov.l 8f, r
|
||||
#else
|
||||
#define ldmmupteh(r) mov #MMU_PTEH, r
|
||||
#endif
|
||||
|
||||
.balign 1024,0,1024
|
||||
tlb_miss:
|
||||
mov.l 1f, k2
|
||||
#ifdef COUNT_EXCEPTIONS
|
||||
! Increment the counts
|
||||
mov.l 9f, k1
|
||||
mov.l @k1, k2
|
||||
add #1, k2
|
||||
mov.l k2, @k1
|
||||
#endif
|
||||
|
||||
! k0 scratch
|
||||
! k1 pgd and pte pointers
|
||||
! k2 faulting address
|
||||
! k3 pgd and pte index masks
|
||||
! k4 shift
|
||||
|
||||
! Load up the pgd entry (k1)
|
||||
|
||||
ldmmupteh(k0) ! 9 LS (latency=2) MMU_PTEH
|
||||
|
||||
mov.w 4f, k3 ! 8 LS (latency=2) (PTRS_PER_PGD-1) << 2
|
||||
mov #-(PGDIR_SHIFT-2), k4 ! 6 EX
|
||||
|
||||
mov.l @(MMU_TEA-MMU_PTEH,k0), k2 ! 18 LS (latency=2)
|
||||
|
||||
mov.l @(MMU_TTB-MMU_PTEH,k0), k1 ! 18 LS (latency=2)
|
||||
|
||||
mov k2, k0 ! 5 MT (latency=0)
|
||||
shld k4, k0 ! 99 EX
|
||||
|
||||
and k3, k0 ! 78 EX
|
||||
|
||||
mov.l @(k0, k1), k1 ! 21 LS (latency=2)
|
||||
mov #-(PAGE_SHIFT-2), k4 ! 6 EX
|
||||
|
||||
! Load up the pte entry (k2)
|
||||
|
||||
mov k2, k0 ! 5 MT (latency=0)
|
||||
shld k4, k0 ! 99 EX
|
||||
|
||||
tst k1, k1 ! 86 MT
|
||||
|
||||
bt 20f ! 110 BR
|
||||
|
||||
and k3, k0 ! 78 EX
|
||||
mov.w 5f, k4 ! 8 LS (latency=2) _PAGE_PRESENT
|
||||
|
||||
mov.l @(k0, k1), k2 ! 21 LS (latency=2)
|
||||
add k0, k1 ! 49 EX
|
||||
|
||||
#ifdef CONFIG_CPU_HAS_PTEA
|
||||
! Test the entry for present and _PAGE_ACCESSED
|
||||
|
||||
mov #-28, k3 ! 6 EX
|
||||
mov k2, k0 ! 5 MT (latency=0)
|
||||
|
||||
tst k4, k2 ! 68 MT
|
||||
shld k3, k0 ! 99 EX
|
||||
|
||||
bt 20f ! 110 BR
|
||||
|
||||
! Set PTEA register
|
||||
! MMU_PTEA = ((pteval >> 28) & 0xe) | (pteval & 0x1)
|
||||
!
|
||||
! k0=pte>>28, k1=pte*, k2=pte, k3=<unused>, k4=_PAGE_PRESENT
|
||||
|
||||
and #0xe, k0 ! 79 EX
|
||||
|
||||
mov k0, k3 ! 5 MT (latency=0)
|
||||
mov k2, k0 ! 5 MT (latency=0)
|
||||
|
||||
and #1, k0 ! 79 EX
|
||||
|
||||
or k0, k3 ! 82 EX
|
||||
|
||||
ldmmupteh(k0) ! 9 LS (latency=2)
|
||||
shll2 k4 ! 101 EX _PAGE_ACCESSED
|
||||
|
||||
tst k4, k2 ! 68 MT
|
||||
|
||||
mov.l k3, @(MMU_PTEA-MMU_PTEH,k0) ! 27 LS
|
||||
|
||||
mov.l 7f, k3 ! 9 LS (latency=2) _PAGE_FLAGS_HARDWARE_MASK
|
||||
|
||||
! k0=MMU_PTEH, k1=pte*, k2=pte, k3=_PAGE_FLAGS_HARDWARE, k4=_PAGE_ACCESSED
|
||||
#else
|
||||
|
||||
! Test the entry for present and _PAGE_ACCESSED
|
||||
|
||||
mov.l 7f, k3 ! 9 LS (latency=2) _PAGE_FLAGS_HARDWARE_MASK
|
||||
tst k4, k2 ! 68 MT
|
||||
|
||||
shll2 k4 ! 101 EX _PAGE_ACCESSED
|
||||
ldmmupteh(k0) ! 9 LS (latency=2)
|
||||
|
||||
bt 20f ! 110 BR
|
||||
tst k4, k2 ! 68 MT
|
||||
|
||||
! k0=MMU_PTEH, k1=pte*, k2=pte, k3=_PAGE_FLAGS_HARDWARE, k4=_PAGE_ACCESSED
|
||||
|
||||
#endif
|
||||
|
||||
! Set up the entry
|
||||
|
||||
and k2, k3 ! 78 EX
|
||||
bt/s 10f ! 108 BR
|
||||
|
||||
mov.l k3, @(MMU_PTEL-MMU_PTEH,k0) ! 27 LS
|
||||
|
||||
ldtlb ! 128 CO
|
||||
|
||||
! At least one instruction between ldtlb and rte
|
||||
nop ! 119 NOP
|
||||
|
||||
rte ! 126 CO
|
||||
|
||||
nop ! 119 NOP
|
||||
|
||||
|
||||
10: or k4, k2 ! 82 EX
|
||||
|
||||
ldtlb ! 128 CO
|
||||
|
||||
! At least one instruction between ldtlb and rte
|
||||
mov.l k2, @k1 ! 27 LS
|
||||
|
||||
rte ! 126 CO
|
||||
|
||||
! Note we cannot execute mov here, because it is executed after
|
||||
! restoring SSR, so would be executed in user space.
|
||||
nop ! 119 NOP
|
||||
|
||||
|
||||
.align 5
|
||||
! Once cache line if possible...
|
||||
1: .long swapper_pg_dir
|
||||
4: .short (PTRS_PER_PGD-1) << 2
|
||||
5: .short _PAGE_PRESENT
|
||||
7: .long _PAGE_FLAGS_HARDWARE_MASK
|
||||
8: .long MMU_PTEH
|
||||
#ifdef COUNT_EXCEPTIONS
|
||||
9: .long exception_count_miss
|
||||
#endif
|
||||
|
||||
! Either pgd or pte not present
|
||||
20: mov.l 1f, k2
|
||||
mov.l 4f, k3
|
||||
bra handle_exception
|
||||
mov.l @k2, k2
|
||||
|
@ -496,6 +650,15 @@ skip_save:
|
|||
bf interrupt_exception
|
||||
shlr2 r8
|
||||
shlr r8
|
||||
|
||||
#ifdef COUNT_EXCEPTIONS
|
||||
mov.l 5f, r9
|
||||
add r8, r9
|
||||
mov.l @r9, r10
|
||||
add #1, r10
|
||||
mov.l r10, @r9
|
||||
#endif
|
||||
|
||||
mov.l 4f, r9
|
||||
add r8, r9
|
||||
mov.l @r9, r9
|
||||
|
@ -509,6 +672,9 @@ skip_save:
|
|||
2: .long 0x000080f0 ! FD=1, IMASK=15
|
||||
3: .long 0xcfffffff ! RB=0, BL=0
|
||||
4: .long exception_handling_table
|
||||
#ifdef COUNT_EXCEPTIONS
|
||||
5: .long exception_count_table
|
||||
#endif
|
||||
|
||||
interrupt_exception:
|
||||
mov.l 1f, r9
|
||||
|
|
|
@ -79,16 +79,16 @@ int __init detect_cpu_and_cache_system(void)
|
|||
case 0x205:
|
||||
cpu_data->type = CPU_SH7750;
|
||||
cpu_data->flags |= CPU_HAS_P2_FLUSH_BUG | CPU_HAS_FPU |
|
||||
CPU_HAS_PERF_COUNTER | CPU_HAS_PTEA;
|
||||
CPU_HAS_PERF_COUNTER;
|
||||
break;
|
||||
case 0x206:
|
||||
cpu_data->type = CPU_SH7750S;
|
||||
cpu_data->flags |= CPU_HAS_P2_FLUSH_BUG | CPU_HAS_FPU |
|
||||
CPU_HAS_PERF_COUNTER | CPU_HAS_PTEA;
|
||||
CPU_HAS_PERF_COUNTER;
|
||||
break;
|
||||
case 0x1100:
|
||||
cpu_data->type = CPU_SH7751;
|
||||
cpu_data->flags |= CPU_HAS_FPU | CPU_HAS_PTEA;
|
||||
cpu_data->flags |= CPU_HAS_FPU;
|
||||
break;
|
||||
case 0x2000:
|
||||
cpu_data->type = CPU_SH73180;
|
||||
|
@ -126,23 +126,22 @@ int __init detect_cpu_and_cache_system(void)
|
|||
break;
|
||||
case 0x8000:
|
||||
cpu_data->type = CPU_ST40RA;
|
||||
cpu_data->flags |= CPU_HAS_FPU | CPU_HAS_PTEA;
|
||||
cpu_data->flags |= CPU_HAS_FPU;
|
||||
break;
|
||||
case 0x8100:
|
||||
cpu_data->type = CPU_ST40GX1;
|
||||
cpu_data->flags |= CPU_HAS_FPU | CPU_HAS_PTEA;
|
||||
cpu_data->flags |= CPU_HAS_FPU;
|
||||
break;
|
||||
case 0x700:
|
||||
cpu_data->type = CPU_SH4_501;
|
||||
cpu_data->icache.ways = 2;
|
||||
cpu_data->dcache.ways = 2;
|
||||
cpu_data->flags |= CPU_HAS_PTEA;
|
||||
break;
|
||||
case 0x600:
|
||||
cpu_data->type = CPU_SH4_202;
|
||||
cpu_data->icache.ways = 2;
|
||||
cpu_data->dcache.ways = 2;
|
||||
cpu_data->flags |= CPU_HAS_FPU | CPU_HAS_PTEA;
|
||||
cpu_data->flags |= CPU_HAS_FPU;
|
||||
break;
|
||||
case 0x500 ... 0x501:
|
||||
switch (prr) {
|
||||
|
@ -160,7 +159,7 @@ int __init detect_cpu_and_cache_system(void)
|
|||
cpu_data->icache.ways = 2;
|
||||
cpu_data->dcache.ways = 2;
|
||||
|
||||
cpu_data->flags |= CPU_HAS_FPU | CPU_HAS_PTEA;
|
||||
cpu_data->flags |= CPU_HAS_FPU;
|
||||
|
||||
break;
|
||||
default:
|
||||
|
@ -173,6 +172,10 @@ int __init detect_cpu_and_cache_system(void)
|
|||
cpu_data->dcache.ways = 1;
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_CPU_HAS_PTEA
|
||||
cpu_data->flags |= CPU_HAS_PTEA;
|
||||
#endif
|
||||
|
||||
/*
|
||||
* On anything that's not a direct-mapped cache, look to the CVR
|
||||
* for I/D-cache specifics.
|
||||
|
|
|
@ -20,6 +20,7 @@ config CPU_SH4
|
|||
bool
|
||||
select CPU_HAS_INTEVT
|
||||
select CPU_HAS_SR_RB
|
||||
select CPU_HAS_PTEA if !CPU_SUBTYPE_ST40
|
||||
|
||||
config CPU_SH4A
|
||||
bool
|
||||
|
|
|
@ -223,89 +223,3 @@ do_sigbus:
|
|||
if (!user_mode(regs))
|
||||
goto no_context;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_SH_STORE_QUEUES
|
||||
/*
|
||||
* This is a special case for the SH-4 store queues, as pages for this
|
||||
* space still need to be faulted in before it's possible to flush the
|
||||
* store queue cache for writeout to the remapped region.
|
||||
*/
|
||||
#define P3_ADDR_MAX (P4SEG_STORE_QUE + 0x04000000)
|
||||
#else
|
||||
#define P3_ADDR_MAX P4SEG
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Called with interrupts disabled.
|
||||
*/
|
||||
asmlinkage int __kprobes __do_page_fault(struct pt_regs *regs,
|
||||
unsigned long writeaccess,
|
||||
unsigned long address)
|
||||
{
|
||||
pgd_t *pgd;
|
||||
pud_t *pud;
|
||||
pmd_t *pmd;
|
||||
pte_t *pte;
|
||||
pte_t entry;
|
||||
struct mm_struct *mm = current->mm;
|
||||
spinlock_t *ptl;
|
||||
int ret = 1;
|
||||
|
||||
#ifdef CONFIG_SH_KGDB
|
||||
if (kgdb_nofault && kgdb_bus_err_hook)
|
||||
kgdb_bus_err_hook();
|
||||
#endif
|
||||
|
||||
/*
|
||||
* We don't take page faults for P1, P2, and parts of P4, these
|
||||
* are always mapped, whether it be due to legacy behaviour in
|
||||
* 29-bit mode, or due to PMB configuration in 32-bit mode.
|
||||
*/
|
||||
if (address >= P3SEG && address < P3_ADDR_MAX) {
|
||||
pgd = pgd_offset_k(address);
|
||||
mm = NULL;
|
||||
} else {
|
||||
if (unlikely(address >= TASK_SIZE || !mm))
|
||||
return 1;
|
||||
|
||||
pgd = pgd_offset(mm, address);
|
||||
}
|
||||
|
||||
pud = pud_offset(pgd, address);
|
||||
if (pud_none_or_clear_bad(pud))
|
||||
return 1;
|
||||
pmd = pmd_offset(pud, address);
|
||||
if (pmd_none_or_clear_bad(pmd))
|
||||
return 1;
|
||||
|
||||
if (mm)
|
||||
pte = pte_offset_map_lock(mm, pmd, address, &ptl);
|
||||
else
|
||||
pte = pte_offset_kernel(pmd, address);
|
||||
|
||||
entry = *pte;
|
||||
if (unlikely(pte_none(entry) || pte_not_present(entry)))
|
||||
goto unlock;
|
||||
if (unlikely(writeaccess && !pte_write(entry)))
|
||||
goto unlock;
|
||||
|
||||
if (writeaccess)
|
||||
entry = pte_mkdirty(entry);
|
||||
entry = pte_mkyoung(entry);
|
||||
|
||||
#ifdef CONFIG_CPU_SH4
|
||||
/*
|
||||
* ITLB is not affected by "ldtlb" instruction.
|
||||
* So, we need to flush the entry by ourselves.
|
||||
*/
|
||||
__flush_tlb_page(get_asid(), address & PAGE_MASK);
|
||||
#endif
|
||||
|
||||
set_pte(pte, entry);
|
||||
update_mmu_cache(NULL, address, entry);
|
||||
ret = 0;
|
||||
unlock:
|
||||
if (mm)
|
||||
pte_unmap_unlock(pte, ptl);
|
||||
return ret;
|
||||
}
|
||||
|
|
|
@ -43,12 +43,12 @@ extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)];
|
|||
/* PGD bits */
|
||||
#define PGDIR_SHIFT (PTE_SHIFT + PTE_BITS)
|
||||
#define PGDIR_BITS (32 - PGDIR_SHIFT)
|
||||
#define PGDIR_SIZE (1UL << PGDIR_SHIFT)
|
||||
#define PGDIR_SIZE (1 << PGDIR_SHIFT)
|
||||
#define PGDIR_MASK (~(PGDIR_SIZE-1))
|
||||
|
||||
/* Entries per level */
|
||||
#define PTRS_PER_PTE (1UL << PTE_BITS)
|
||||
#define PTRS_PER_PGD (1UL << PGDIR_BITS)
|
||||
#define PTRS_PER_PTE (1 << PTE_BITS)
|
||||
#define PTRS_PER_PGD (1 << PGDIR_BITS)
|
||||
|
||||
#define USER_PTRS_PER_PGD (TASK_SIZE/PGDIR_SIZE)
|
||||
#define FIRST_USER_ADDRESS 0
|
||||
|
|
Loading…
Reference in New Issue