From e8a75963a4b9433dca55286e222f4dd1cc1ca76c Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Fri, 28 Aug 2015 08:39:57 +0530 Subject: [PATCH 01/42] ARC: mm: switch pgtable_to to pte_t * ARC is the only arch with unsigned long type (vs. struct page *). Historically this was done to avoid the page_address() calls in various arch hooks which need to get the virtual/logical address of the table. Some arches alternately define it as pte_t *, and is as efficient as unsigned long (generated code doesn't change) Suggested-by: Kirill A. Shutemov Signed-off-by: Vineet Gupta --- arch/arc/include/asm/page.h | 4 ++-- arch/arc/include/asm/pgalloc.h | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/arc/include/asm/page.h b/arch/arc/include/asm/page.h index 9c8aa41e45c2..2994cac1069e 100644 --- a/arch/arc/include/asm/page.h +++ b/arch/arc/include/asm/page.h @@ -43,7 +43,6 @@ typedef struct { typedef struct { unsigned long pgprot; } pgprot_t; -typedef unsigned long pgtable_t; #define pte_val(x) ((x).pte) #define pgd_val(x) ((x).pgd) @@ -60,7 +59,6 @@ typedef unsigned long pgtable_t; typedef unsigned long pte_t; typedef unsigned long pgd_t; typedef unsigned long pgprot_t; -typedef unsigned long pgtable_t; #define pte_val(x) (x) #define pgd_val(x) (x) @@ -71,6 +69,8 @@ typedef unsigned long pgtable_t; #endif +typedef pte_t * pgtable_t; + #define ARCH_PFN_OFFSET (CONFIG_LINUX_LINK_BASE >> PAGE_SHIFT) #define pfn_valid(pfn) (((pfn) - ARCH_PFN_OFFSET) < max_mapnr) diff --git a/arch/arc/include/asm/pgalloc.h b/arch/arc/include/asm/pgalloc.h index 81208bfd9dcb..9149b5ca26d7 100644 --- a/arch/arc/include/asm/pgalloc.h +++ b/arch/arc/include/asm/pgalloc.h @@ -107,7 +107,7 @@ pte_alloc_one(struct mm_struct *mm, unsigned long address) pgtable_t pte_pg; struct page *page; - pte_pg = __get_free_pages(GFP_KERNEL | __GFP_REPEAT, __get_order_pte()); + pte_pg = (pgtable_t)__get_free_pages(GFP_KERNEL | __GFP_REPEAT, __get_order_pte()); if (!pte_pg) return 0; memzero((void *)pte_pg, PTRS_PER_PTE * 4); @@ -128,12 +128,12 @@ static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte) static inline void pte_free(struct mm_struct *mm, pgtable_t ptep) { pgtable_page_dtor(virt_to_page(ptep)); - free_pages(ptep, __get_order_pte()); + free_pages((unsigned long)ptep, __get_order_pte()); } #define __pte_free_tlb(tlb, pte, addr) pte_free((tlb)->mm, pte) #define check_pgt_cache() do { } while (0) -#define pmd_pgtable(pmd) pmd_page_vaddr(pmd) +#define pmd_pgtable(pmd) ((pgtable_t) pmd_page_vaddr(pmd)) #endif /* _ASM_ARC_PGALLOC_H */ From 129cbed54a8b3f80f0eaf49acb14fe835587f6f3 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Thu, 5 Dec 2013 12:05:05 +0530 Subject: [PATCH 02/42] ARC: mm: pte flags comsetic cleanups, comments No semantical changes Signed-off-by: Vineet Gupta --- arch/arc/include/asm/pgtable.h | 37 +++++++++++++++------------------- arch/arc/mm/tlbex.S | 2 +- 2 files changed, 17 insertions(+), 22 deletions(-) diff --git a/arch/arc/include/asm/pgtable.h b/arch/arc/include/asm/pgtable.h index 1281718802f7..481359fe56ae 100644 --- a/arch/arc/include/asm/pgtable.h +++ b/arch/arc/include/asm/pgtable.h @@ -60,7 +60,7 @@ #define _PAGE_EXECUTE (1<<3) /* Page has user execute perm (H) */ #define _PAGE_WRITE (1<<4) /* Page has user write perm (H) */ #define _PAGE_READ (1<<5) /* Page has user read perm (H) */ -#define _PAGE_MODIFIED (1<<6) /* Page modified (dirty) (S) */ +#define _PAGE_DIRTY (1<<6) /* Page modified (dirty) (S) */ #define _PAGE_GLOBAL (1<<8) /* Page is global (H) */ #define _PAGE_PRESENT (1<<10) /* TLB entry is valid (H) */ @@ -71,7 +71,7 @@ #define _PAGE_WRITE (1<<2) /* Page has user write perm (H) */ #define _PAGE_READ (1<<3) /* Page has user read perm (H) */ #define _PAGE_ACCESSED (1<<4) /* Page is accessed (S) */ -#define _PAGE_MODIFIED (1<<5) /* Page modified (dirty) (S) */ +#define _PAGE_DIRTY (1<<5) /* Page modified (dirty) (S) */ #if (CONFIG_ARC_MMU_VER >= 4) #define _PAGE_WTHRU (1<<7) /* Page cache mode write-thru (H) */ @@ -92,21 +92,16 @@ #define _K_PAGE_PERMS (_PAGE_EXECUTE | _PAGE_WRITE | _PAGE_READ | \ _PAGE_GLOBAL | _PAGE_PRESENT) -#ifdef CONFIG_ARC_CACHE_PAGES -#define _PAGE_DEF_CACHEABLE _PAGE_CACHEABLE -#else -#define _PAGE_DEF_CACHEABLE (0) +#ifndef CONFIG_ARC_CACHE_PAGES +#undef _PAGE_CACHEABLE +#define _PAGE_CACHEABLE 0 #endif -/* Helper for every "user" page - * -kernel can R/W/X - * -by default cached, unless config otherwise - * -present in memory - */ -#define ___DEF (_PAGE_PRESENT | _PAGE_DEF_CACHEABLE) +/* Defaults for every user page */ +#define ___DEF (_PAGE_PRESENT | _PAGE_CACHEABLE) /* Set of bits not changed in pte_modify */ -#define _PAGE_CHG_MASK (PAGE_MASK | _PAGE_ACCESSED | _PAGE_MODIFIED) +#define _PAGE_CHG_MASK (PAGE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY) /* More Abbrevaited helpers */ #define PAGE_U_NONE __pgprot(___DEF) @@ -122,7 +117,7 @@ * user vaddr space - visible in all addr spaces, but kernel mode only * Thus Global, all-kernel-access, no-user-access, cached */ -#define PAGE_KERNEL __pgprot(_K_PAGE_PERMS | _PAGE_DEF_CACHEABLE) +#define PAGE_KERNEL __pgprot(_K_PAGE_PERMS | _PAGE_CACHEABLE) /* ioremap */ #define PAGE_KERNEL_NO_CACHE __pgprot(_K_PAGE_PERMS) @@ -191,16 +186,16 @@ /* Optimal Sizing of Pg Tbl - based on MMU page size */ #if defined(CONFIG_ARC_PAGE_SIZE_8K) -#define BITS_FOR_PTE 8 +#define BITS_FOR_PTE 8 /* 11:8:13 */ #elif defined(CONFIG_ARC_PAGE_SIZE_16K) -#define BITS_FOR_PTE 8 +#define BITS_FOR_PTE 8 /* 10:8:14 */ #elif defined(CONFIG_ARC_PAGE_SIZE_4K) -#define BITS_FOR_PTE 9 +#define BITS_FOR_PTE 9 /* 11:9:12 */ #endif #define BITS_FOR_PGD (32 - BITS_FOR_PTE - BITS_IN_PAGE) -#define PGDIR_SHIFT (BITS_FOR_PTE + BITS_IN_PAGE) +#define PGDIR_SHIFT (32 - BITS_FOR_PGD) #define PGDIR_SIZE (1UL << PGDIR_SHIFT) /* vaddr span, not PDG sz */ #define PGDIR_MASK (~(PGDIR_SIZE-1)) @@ -295,7 +290,7 @@ static inline void pmd_set(pmd_t *pmdp, pte_t *ptep) /* Zoo of pte_xxx function */ #define pte_read(pte) (pte_val(pte) & _PAGE_READ) #define pte_write(pte) (pte_val(pte) & _PAGE_WRITE) -#define pte_dirty(pte) (pte_val(pte) & _PAGE_MODIFIED) +#define pte_dirty(pte) (pte_val(pte) & _PAGE_DIRTY) #define pte_young(pte) (pte_val(pte) & _PAGE_ACCESSED) #define pte_special(pte) (0) @@ -304,8 +299,8 @@ static inline void pmd_set(pmd_t *pmdp, pte_t *ptep) PTE_BIT_FUNC(wrprotect, &= ~(_PAGE_WRITE)); PTE_BIT_FUNC(mkwrite, |= (_PAGE_WRITE)); -PTE_BIT_FUNC(mkclean, &= ~(_PAGE_MODIFIED)); -PTE_BIT_FUNC(mkdirty, |= (_PAGE_MODIFIED)); +PTE_BIT_FUNC(mkclean, &= ~(_PAGE_DIRTY)); +PTE_BIT_FUNC(mkdirty, |= (_PAGE_DIRTY)); PTE_BIT_FUNC(mkold, &= ~(_PAGE_ACCESSED)); PTE_BIT_FUNC(mkyoung, |= (_PAGE_ACCESSED)); PTE_BIT_FUNC(exprotect, &= ~(_PAGE_EXECUTE)); diff --git a/arch/arc/mm/tlbex.S b/arch/arc/mm/tlbex.S index f6f4c3cb505d..b8b014c6904d 100644 --- a/arch/arc/mm/tlbex.S +++ b/arch/arc/mm/tlbex.S @@ -365,7 +365,7 @@ ENTRY(EV_TLBMissD) lr r3, [ecr] or r0, r0, _PAGE_ACCESSED ; Accessed bit always btst_s r3, ECR_C_BIT_DTLB_ST_MISS ; See if it was a Write Access ? - or.nz r0, r0, _PAGE_MODIFIED ; if Write, set Dirty bit as well + or.nz r0, r0, _PAGE_DIRTY ; if Write, set Dirty bit as well st_s r0, [r1] ; Write back PTE CONV_PTE_TO_TLB From 24830fc782a3a740209d39cb27abbf5a9763f61f Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Mon, 16 Feb 2015 19:01:29 +0530 Subject: [PATCH 03/42] ARC: mm: Introduce PTE_SPECIAL Needed for THP, but will also come in handy for fast GUP later Signed-off-by: Vineet Gupta --- arch/arc/include/asm/pgtable.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/arch/arc/include/asm/pgtable.h b/arch/arc/include/asm/pgtable.h index 481359fe56ae..431a83329324 100644 --- a/arch/arc/include/asm/pgtable.h +++ b/arch/arc/include/asm/pgtable.h @@ -61,6 +61,7 @@ #define _PAGE_WRITE (1<<4) /* Page has user write perm (H) */ #define _PAGE_READ (1<<5) /* Page has user read perm (H) */ #define _PAGE_DIRTY (1<<6) /* Page modified (dirty) (S) */ +#define _PAGE_SPECIAL (1<<7) #define _PAGE_GLOBAL (1<<8) /* Page is global (H) */ #define _PAGE_PRESENT (1<<10) /* TLB entry is valid (H) */ @@ -72,6 +73,7 @@ #define _PAGE_READ (1<<3) /* Page has user read perm (H) */ #define _PAGE_ACCESSED (1<<4) /* Page is accessed (S) */ #define _PAGE_DIRTY (1<<5) /* Page modified (dirty) (S) */ +#define _PAGE_SPECIAL (1<<6) #if (CONFIG_ARC_MMU_VER >= 4) #define _PAGE_WTHRU (1<<7) /* Page cache mode write-thru (H) */ @@ -292,7 +294,7 @@ static inline void pmd_set(pmd_t *pmdp, pte_t *ptep) #define pte_write(pte) (pte_val(pte) & _PAGE_WRITE) #define pte_dirty(pte) (pte_val(pte) & _PAGE_DIRTY) #define pte_young(pte) (pte_val(pte) & _PAGE_ACCESSED) -#define pte_special(pte) (0) +#define pte_special(pte) (pte_val(pte) & _PAGE_SPECIAL) #define PTE_BIT_FUNC(fn, op) \ static inline pte_t pte_##fn(pte_t pte) { pte_val(pte) op; return pte; } @@ -305,8 +307,9 @@ PTE_BIT_FUNC(mkold, &= ~(_PAGE_ACCESSED)); PTE_BIT_FUNC(mkyoung, |= (_PAGE_ACCESSED)); PTE_BIT_FUNC(exprotect, &= ~(_PAGE_EXECUTE)); PTE_BIT_FUNC(mkexec, |= (_PAGE_EXECUTE)); +PTE_BIT_FUNC(mkspecial, |= (_PAGE_SPECIAL)); -static inline pte_t pte_mkspecial(pte_t pte) { return pte; } +#define __HAVE_ARCH_PTE_SPECIAL static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) { From 55ad769fde922982533d538bdef37c90a0d82e90 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Thu, 27 Aug 2015 14:02:33 +0530 Subject: [PATCH 04/42] Documentation/features/vm: pte_special now supported by ARC Signed-off-by: Vineet Gupta --- Documentation/features/vm/pte_special/arch-support.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/features/vm/pte_special/arch-support.txt b/Documentation/features/vm/pte_special/arch-support.txt index aaaa21db6226..3de5434c857c 100644 --- a/Documentation/features/vm/pte_special/arch-support.txt +++ b/Documentation/features/vm/pte_special/arch-support.txt @@ -7,7 +7,7 @@ | arch |status| ----------------------- | alpha: | TODO | - | arc: | TODO | + | arc: | ok | | arm: | ok | | arm64: | ok | | avr32: | TODO | From fe6c1b8611aa3a79a937a5e3b85a16576b6ad159 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Tue, 8 Jul 2014 18:43:47 +0530 Subject: [PATCH 05/42] ARCv2: mm: THP support MMUv4 in HS38x cores supports Super Pages which are basis for Linux THP support. Normal and Super pages can co-exist (ofcourse not overlap) in TLB with a new bit "SZ" in TLB page desciptor to distinguish between them. Super Page size is configurable in hardware (4K to 16M), but fixed once RTL builds. The exact THP size a Linx configuration will support is a function of: - MMU page size (typical 8K, RTL fixed) - software page walker address split between PGD:PTE:PFN (typical 11:8:13, but can be changed with 1 line) So for above default, THP size supported is 8K * 256 = 2M Default Page Walker is 2 levels, PGD:PTE:PFN, which in THP regime reduces to 1 level (as PTE is folded into PGD and canonically referred to as PMD). Thus thp PMD accessors are implemented in terms of PTE (just like sparc) Signed-off-by: Vineet Gupta --- arch/arc/Kconfig | 4 ++ arch/arc/include/asm/hugepage.h | 77 +++++++++++++++++++++++++++++++ arch/arc/include/asm/page.h | 1 + arch/arc/include/asm/pgtable.h | 16 ++++++- arch/arc/mm/tlb.c | 81 +++++++++++++++++++++++++++++++++ arch/arc/mm/tlbex.S | 19 ++++++-- 6 files changed, 192 insertions(+), 6 deletions(-) create mode 100644 arch/arc/include/asm/hugepage.h diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig index 78c0621d5819..5912006391ed 100644 --- a/arch/arc/Kconfig +++ b/arch/arc/Kconfig @@ -76,6 +76,10 @@ config STACKTRACE_SUPPORT config HAVE_LATENCYTOP_SUPPORT def_bool y +config HAVE_ARCH_TRANSPARENT_HUGEPAGE + def_bool y + depends on ARC_MMU_V4 + source "init/Kconfig" source "kernel/Kconfig.freezer" diff --git a/arch/arc/include/asm/hugepage.h b/arch/arc/include/asm/hugepage.h new file mode 100644 index 000000000000..1d0700c32b82 --- /dev/null +++ b/arch/arc/include/asm/hugepage.h @@ -0,0 +1,77 @@ +/* + * Copyright (C) 2013-15 Synopsys, Inc. (www.synopsys.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + + +#ifndef _ASM_ARC_HUGEPAGE_H +#define _ASM_ARC_HUGEPAGE_H + +#include +#include + +static inline pte_t pmd_pte(pmd_t pmd) +{ + return __pte(pmd_val(pmd)); +} + +static inline pmd_t pte_pmd(pte_t pte) +{ + return __pmd(pte_val(pte)); +} + +#define pmd_wrprotect(pmd) pte_pmd(pte_wrprotect(pmd_pte(pmd))) +#define pmd_mkwrite(pmd) pte_pmd(pte_mkwrite(pmd_pte(pmd))) +#define pmd_mkdirty(pmd) pte_pmd(pte_mkdirty(pmd_pte(pmd))) +#define pmd_mkold(pmd) pte_pmd(pte_mkold(pmd_pte(pmd))) +#define pmd_mkyoung(pmd) pte_pmd(pte_mkyoung(pmd_pte(pmd))) +#define pmd_mkhuge(pmd) pte_pmd(pte_mkhuge(pmd_pte(pmd))) +#define pmd_mknotpresent(pmd) pte_pmd(pte_mknotpresent(pmd_pte(pmd))) +#define pmd_mksplitting(pmd) pte_pmd(pte_mkspecial(pmd_pte(pmd))) +#define pmd_mkclean(pmd) pte_pmd(pte_mkclean(pmd_pte(pmd))) + +#define pmd_write(pmd) pte_write(pmd_pte(pmd)) +#define pmd_young(pmd) pte_young(pmd_pte(pmd)) +#define pmd_pfn(pmd) pte_pfn(pmd_pte(pmd)) +#define pmd_dirty(pmd) pte_dirty(pmd_pte(pmd)) +#define pmd_special(pmd) pte_special(pmd_pte(pmd)) + +#define mk_pmd(page, prot) pte_pmd(mk_pte(page, prot)) + +#define pmd_trans_huge(pmd) (pmd_val(pmd) & _PAGE_HW_SZ) +#define pmd_trans_splitting(pmd) (pmd_trans_huge(pmd) && pmd_special(pmd)) + +#define pfn_pmd(pfn, prot) (__pmd(((pfn) << PAGE_SHIFT) | pgprot_val(prot))) + +static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot) +{ + /* + * open-coded pte_modify() with additional retaining of HW_SZ bit + * so that pmd_trans_huge() remains true for this PMD + */ + return __pmd((pmd_val(pmd) & (_PAGE_CHG_MASK | _PAGE_HW_SZ)) | pgprot_val(newprot)); +} + +static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr, + pmd_t *pmdp, pmd_t pmd) +{ + *pmdp = pmd; +} + +extern void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long addr, + pmd_t *pmd); + +#define has_transparent_hugepage() 1 + +/* Generic variants assume pgtable_t is struct page *, hence need for these */ +#define __HAVE_ARCH_PGTABLE_DEPOSIT +extern void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp, + pgtable_t pgtable); + +#define __HAVE_ARCH_PGTABLE_WITHDRAW +extern pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp); + +#endif diff --git a/arch/arc/include/asm/page.h b/arch/arc/include/asm/page.h index 2994cac1069e..37706837ef75 100644 --- a/arch/arc/include/asm/page.h +++ b/arch/arc/include/asm/page.h @@ -64,6 +64,7 @@ typedef unsigned long pgprot_t; #define pgd_val(x) (x) #define pgprot_val(x) (x) #define __pte(x) (x) +#define __pgd(x) (x) #define __pgprot(x) (x) #define pte_pgprot(x) (x) diff --git a/arch/arc/include/asm/pgtable.h b/arch/arc/include/asm/pgtable.h index 431a83329324..336267f2e9d9 100644 --- a/arch/arc/include/asm/pgtable.h +++ b/arch/arc/include/asm/pgtable.h @@ -83,11 +83,13 @@ #define _PAGE_PRESENT (1<<9) /* TLB entry is valid (H) */ #if (CONFIG_ARC_MMU_VER >= 4) -#define _PAGE_SZ (1<<10) /* Page Size indicator (H) */ +#define _PAGE_HW_SZ (1<<10) /* Page Size indicator (H): 0 normal, 1 super */ #endif #define _PAGE_SHARED_CODE (1<<11) /* Shared Code page with cmn vaddr usable for shared TLB entries (H) */ + +#define _PAGE_UNUSED_BIT (1<<12) #endif /* vmalloc permissions */ @@ -99,6 +101,10 @@ #define _PAGE_CACHEABLE 0 #endif +#ifndef _PAGE_HW_SZ +#define _PAGE_HW_SZ 0 +#endif + /* Defaults for every user page */ #define ___DEF (_PAGE_PRESENT | _PAGE_CACHEABLE) @@ -125,7 +131,7 @@ #define PAGE_KERNEL_NO_CACHE __pgprot(_K_PAGE_PERMS) /* Masks for actual TLB "PD"s */ -#define PTE_BITS_IN_PD0 (_PAGE_GLOBAL | _PAGE_PRESENT) +#define PTE_BITS_IN_PD0 (_PAGE_GLOBAL | _PAGE_PRESENT | _PAGE_HW_SZ) #define PTE_BITS_RWX (_PAGE_EXECUTE | _PAGE_WRITE | _PAGE_READ) #define PTE_BITS_NON_RWX_IN_PD1 (PAGE_MASK | _PAGE_CACHEABLE) @@ -299,6 +305,7 @@ static inline void pmd_set(pmd_t *pmdp, pte_t *ptep) #define PTE_BIT_FUNC(fn, op) \ static inline pte_t pte_##fn(pte_t pte) { pte_val(pte) op; return pte; } +PTE_BIT_FUNC(mknotpresent, &= ~(_PAGE_PRESENT)); PTE_BIT_FUNC(wrprotect, &= ~(_PAGE_WRITE)); PTE_BIT_FUNC(mkwrite, |= (_PAGE_WRITE)); PTE_BIT_FUNC(mkclean, &= ~(_PAGE_DIRTY)); @@ -308,6 +315,7 @@ PTE_BIT_FUNC(mkyoung, |= (_PAGE_ACCESSED)); PTE_BIT_FUNC(exprotect, &= ~(_PAGE_EXECUTE)); PTE_BIT_FUNC(mkexec, |= (_PAGE_EXECUTE)); PTE_BIT_FUNC(mkspecial, |= (_PAGE_SPECIAL)); +PTE_BIT_FUNC(mkhuge, |= (_PAGE_HW_SZ)); #define __HAVE_ARCH_PTE_SPECIAL @@ -381,6 +389,10 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, * remap a physical page `pfn' of size `size' with page protection `prot' * into virtual address `from' */ +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +#include +#endif + #include /* to cope with aliasing VIPT cache */ diff --git a/arch/arc/mm/tlb.c b/arch/arc/mm/tlb.c index 2c7ce8bb7475..eb1bdc40e24f 100644 --- a/arch/arc/mm/tlb.c +++ b/arch/arc/mm/tlb.c @@ -256,6 +256,18 @@ noinline void local_flush_tlb_all(void) write_aux_reg(ARC_REG_TLBCOMMAND, TLBWrite); } + if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) { + const int stlb_idx = 0x800; + + /* Blank sTLB entry */ + write_aux_reg(ARC_REG_TLBPD0, _PAGE_HW_SZ); + + for (entry = stlb_idx; entry < stlb_idx + 16; entry++) { + write_aux_reg(ARC_REG_TLBINDEX, entry); + write_aux_reg(ARC_REG_TLBCOMMAND, TLBWrite); + } + } + utlb_invalidate(); local_irq_restore(flags); @@ -580,6 +592,75 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long vaddr_unaligned, } } +#ifdef CONFIG_TRANSPARENT_HUGEPAGE + +/* + * MMUv4 in HS38x cores supports Super Pages which are basis for Linux THP + * support. + * + * Normal and Super pages can co-exist (ofcourse not overlap) in TLB with a + * new bit "SZ" in TLB page desciptor to distinguish between them. + * Super Page size is configurable in hardware (4K to 16M), but fixed once + * RTL builds. + * + * The exact THP size a Linx configuration will support is a function of: + * - MMU page size (typical 8K, RTL fixed) + * - software page walker address split between PGD:PTE:PFN (typical + * 11:8:13, but can be changed with 1 line) + * So for above default, THP size supported is 8K * (2^8) = 2M + * + * Default Page Walker is 2 levels, PGD:PTE:PFN, which in THP regime + * reduces to 1 level (as PTE is folded into PGD and canonically referred + * to as PMD). + * Thus THP PMD accessors are implemented in terms of PTE (just like sparc) + */ + +void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long addr, + pmd_t *pmd) +{ + pte_t pte = __pte(pmd_val(*pmd)); + update_mmu_cache(vma, addr, &pte); +} + +void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp, + pgtable_t pgtable) +{ + struct list_head *lh = (struct list_head *) pgtable; + + assert_spin_locked(&mm->page_table_lock); + + /* FIFO */ + if (!pmd_huge_pte(mm, pmdp)) + INIT_LIST_HEAD(lh); + else + list_add(lh, (struct list_head *) pmd_huge_pte(mm, pmdp)); + pmd_huge_pte(mm, pmdp) = pgtable; +} + +pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp) +{ + struct list_head *lh; + pgtable_t pgtable; + + assert_spin_locked(&mm->page_table_lock); + + pgtable = pmd_huge_pte(mm, pmdp); + lh = (struct list_head *) pgtable; + if (list_empty(lh)) + pmd_huge_pte(mm, pmdp) = NULL; + else { + pmd_huge_pte(mm, pmdp) = (pgtable_t) lh->next; + list_del(lh); + } + + pte_val(pgtable[0]) = 0; + pte_val(pgtable[1]) = 0; + + return pgtable; +} + +#endif + /* Read the Cache Build Confuration Registers, Decode them and save into * the cpuinfo structure for later use. * No Validation is done here, simply read/convert the BCRs diff --git a/arch/arc/mm/tlbex.S b/arch/arc/mm/tlbex.S index b8b014c6904d..552594897655 100644 --- a/arch/arc/mm/tlbex.S +++ b/arch/arc/mm/tlbex.S @@ -205,10 +205,18 @@ ex_saved_reg1: #endif lsr r0, r2, PGDIR_SHIFT ; Bits for indexing into PGD - ld.as r1, [r1, r0] ; PGD entry corresp to faulting addr - and.f r1, r1, PAGE_MASK ; Ignoring protection and other flags - ; contains Ptr to Page Table - bz.d do_slow_path_pf ; if no Page Table, do page fault + ld.as r3, [r1, r0] ; PGD entry corresp to faulting addr + tst r3, r3 + bz do_slow_path_pf ; if no Page Table, do page fault + +#ifdef CONFIG_TRANSPARENT_HUGEPAGE + and.f 0, r3, _PAGE_HW_SZ ; Is this Huge PMD (thp) + add2.nz r1, r1, r0 + bnz.d 2f ; YES: PGD == PMD has THP PTE: stop pgd walk + mov.nz r0, r3 + +#endif + and r1, r3, PAGE_MASK ; Get the PTE entry: The idea is ; (1) x = addr >> PAGE_SHIFT -> masks page-off bits from @fault-addr @@ -219,6 +227,9 @@ ex_saved_reg1: lsr r0, r2, (PAGE_SHIFT - 2) and r0, r0, ( (PTRS_PER_PTE - 1) << 2) ld.aw r0, [r1, r0] ; get PTE and PTE ptr for fault addr + +2: + #ifdef CONFIG_ARC_DBG_TLB_MISS_COUNT and.f 0, r0, _PAGE_PRESENT bz 1f From 6ce187985f31c441f7fc10a4d265182d05bc7ad3 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Thu, 12 Mar 2015 19:48:03 +0530 Subject: [PATCH 06/42] ARCv2: mm: THP: boot validation/reporting Signed-off-by: Vineet Gupta --- arch/arc/mm/tlb.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/arch/arc/mm/tlb.c b/arch/arc/mm/tlb.c index eb1bdc40e24f..91905b1c3d72 100644 --- a/arch/arc/mm/tlb.c +++ b/arch/arc/mm/tlb.c @@ -736,7 +736,8 @@ char *arc_mmu_mumbojumbo(int cpu_id, char *buf, int len) if (p_mmu->s_pg_sz_m) scnprintf(super_pg, 64, "%dM Super Page%s, ", - p_mmu->s_pg_sz_m, " (not used)"); + p_mmu->s_pg_sz_m, + IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) ? "" : " (not used)"); n += scnprintf(buf + n, len - n, "MMU [v%x]\t: %dk PAGE, %sJTLB %d (%dx%d), uDTLB %d, uITLB %d %s\n", @@ -771,6 +772,11 @@ void arc_mmu_init(void) if (mmu->pg_sz_k != TO_KB(PAGE_SIZE)) panic("MMU pg size != PAGE_SIZE (%luk)\n", TO_KB(PAGE_SIZE)); + if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && + mmu->s_pg_sz_m != TO_MB(HPAGE_PMD_SIZE)) + panic("MMU Super pg size != Linux HPAGE_PMD_SIZE (%luM)\n", + (unsigned long)TO_MB(HPAGE_PMD_SIZE)); + /* Enable the MMU */ write_aux_reg(ARC_REG_PID, MMU_ENABLE); From 443a6312832fe7362edffed960204f750b230f46 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Thu, 27 Aug 2015 14:03:20 +0530 Subject: [PATCH 07/42] Documentation/features/vm: THP now supported by ARC Signed-off-by: Vineet Gupta --- Documentation/features/vm/THP/arch-support.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/features/vm/THP/arch-support.txt b/Documentation/features/vm/THP/arch-support.txt index df384e3e845f..523f8307b9cd 100644 --- a/Documentation/features/vm/THP/arch-support.txt +++ b/Documentation/features/vm/THP/arch-support.txt @@ -7,7 +7,7 @@ | arch |status| ----------------------- | alpha: | TODO | - | arc: | .. | + | arc: | ok | | arm: | ok | | arm64: | ok | | avr32: | .. | From 52585bcc2505d6f888d4ac68ca6c55f6d1ad736a Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Thu, 9 Jul 2015 17:19:30 +0530 Subject: [PATCH 08/42] mm: group pte related helpers together This reduces/simplifies the diff for the next patch which moves THP specific code. No semantical changes ! Acked-by: Kirill A. Shutemov kirill.shutemov@linux.intel.com Link: http://lkml.kernel.org/r/1442918096-17454-9-git-send-email-vgupta@synopsys.com Signed-off-by: Vineet Gupta --- mm/pgtable-generic.c | 50 ++++++++++++++++++++++---------------------- 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/mm/pgtable-generic.c b/mm/pgtable-generic.c index 6b674e00153c..48851894e699 100644 --- a/mm/pgtable-generic.c +++ b/mm/pgtable-generic.c @@ -57,6 +57,31 @@ int ptep_set_access_flags(struct vm_area_struct *vma, } #endif +#ifndef __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH +int ptep_clear_flush_young(struct vm_area_struct *vma, + unsigned long address, pte_t *ptep) +{ + int young; + young = ptep_test_and_clear_young(vma, address, ptep); + if (young) + flush_tlb_page(vma, address); + return young; +} +#endif + +#ifndef __HAVE_ARCH_PTEP_CLEAR_FLUSH +pte_t ptep_clear_flush(struct vm_area_struct *vma, unsigned long address, + pte_t *ptep) +{ + struct mm_struct *mm = (vma)->vm_mm; + pte_t pte; + pte = ptep_get_and_clear(mm, address, ptep); + if (pte_accessible(mm, pte)) + flush_tlb_page(vma, address); + return pte; +} +#endif + #ifndef __HAVE_ARCH_PMDP_SET_ACCESS_FLAGS int pmdp_set_access_flags(struct vm_area_struct *vma, unsigned long address, pmd_t *pmdp, @@ -77,18 +102,6 @@ int pmdp_set_access_flags(struct vm_area_struct *vma, } #endif -#ifndef __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH -int ptep_clear_flush_young(struct vm_area_struct *vma, - unsigned long address, pte_t *ptep) -{ - int young; - young = ptep_test_and_clear_young(vma, address, ptep); - if (young) - flush_tlb_page(vma, address); - return young; -} -#endif - #ifndef __HAVE_ARCH_PMDP_CLEAR_YOUNG_FLUSH int pmdp_clear_flush_young(struct vm_area_struct *vma, unsigned long address, pmd_t *pmdp) @@ -106,19 +119,6 @@ int pmdp_clear_flush_young(struct vm_area_struct *vma, } #endif -#ifndef __HAVE_ARCH_PTEP_CLEAR_FLUSH -pte_t ptep_clear_flush(struct vm_area_struct *vma, unsigned long address, - pte_t *ptep) -{ - struct mm_struct *mm = (vma)->vm_mm; - pte_t pte; - pte = ptep_get_and_clear(mm, address, ptep); - if (pte_accessible(mm, pte)) - flush_tlb_page(vma, address); - return pte; -} -#endif - #ifndef __HAVE_ARCH_PMDP_HUGE_CLEAR_FLUSH #ifdef CONFIG_TRANSPARENT_HUGEPAGE pmd_t pmdp_huge_clear_flush(struct vm_area_struct *vma, unsigned long address, From bd5e88ad72b26ebf7ecb231bc22ceecd6cbdb951 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Thu, 9 Jul 2015 17:22:44 +0530 Subject: [PATCH 09/42] mm,thp: reduce ifdef'ery for THP in generic code - pgtable-generic.c: Fold individual #ifdef for each helper into a top level #ifdef. Makes code more readable - Converted the stub helpers for !THP to BUILD_BUG() vs. runtime BUG() Acked-by: Kirill A. Shutemov Link: http://lkml.kernel.org/r/20151009133450.GA8597@node Signed-off-by: Vineet Gupta --- include/asm-generic/pgtable.h | 37 ++++++++++++++++++++++++++++------- mm/pgtable-generic.c | 24 +++-------------------- 2 files changed, 33 insertions(+), 28 deletions(-) diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h index 29c57b2cb344..3eabbbbfd578 100644 --- a/include/asm-generic/pgtable.h +++ b/include/asm-generic/pgtable.h @@ -30,9 +30,19 @@ extern int ptep_set_access_flags(struct vm_area_struct *vma, #endif #ifndef __HAVE_ARCH_PMDP_SET_ACCESS_FLAGS +#ifdef CONFIG_TRANSPARENT_HUGEPAGE extern int pmdp_set_access_flags(struct vm_area_struct *vma, unsigned long address, pmd_t *pmdp, pmd_t entry, int dirty); +#else +static inline int pmdp_set_access_flags(struct vm_area_struct *vma, + unsigned long address, pmd_t *pmdp, + pmd_t entry, int dirty) +{ + BUILD_BUG(); + return 0; +} +#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ #endif #ifndef __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG @@ -64,12 +74,12 @@ static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma, set_pmd_at(vma->vm_mm, address, pmdp, pmd_mkold(pmd)); return r; } -#else /* CONFIG_TRANSPARENT_HUGEPAGE */ +#else static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma, unsigned long address, pmd_t *pmdp) { - BUG(); + BUILD_BUG(); return 0; } #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ @@ -81,8 +91,21 @@ int ptep_clear_flush_young(struct vm_area_struct *vma, #endif #ifndef __HAVE_ARCH_PMDP_CLEAR_YOUNG_FLUSH -int pmdp_clear_flush_young(struct vm_area_struct *vma, - unsigned long address, pmd_t *pmdp); +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +extern int pmdp_clear_flush_young(struct vm_area_struct *vma, + unsigned long address, pmd_t *pmdp); +#else +/* + * Despite relevant to THP only, this API is called from generic rmap code + * under PageTransHuge(), hence needs a dummy implementation for !THP + */ +static inline int pmdp_clear_flush_young(struct vm_area_struct *vma, + unsigned long address, pmd_t *pmdp) +{ + BUILD_BUG(); + return 0; +} +#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ #endif #ifndef __HAVE_ARCH_PTEP_GET_AND_CLEAR @@ -175,11 +198,11 @@ static inline void pmdp_set_wrprotect(struct mm_struct *mm, pmd_t old_pmd = *pmdp; set_pmd_at(mm, address, pmdp, pmd_wrprotect(old_pmd)); } -#else /* CONFIG_TRANSPARENT_HUGEPAGE */ +#else static inline void pmdp_set_wrprotect(struct mm_struct *mm, unsigned long address, pmd_t *pmdp) { - BUG(); + BUILD_BUG(); } #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ #endif @@ -248,7 +271,7 @@ static inline int pmd_same(pmd_t pmd_a, pmd_t pmd_b) #else /* CONFIG_TRANSPARENT_HUGEPAGE */ static inline int pmd_same(pmd_t pmd_a, pmd_t pmd_b) { - BUG(); + BUILD_BUG(); return 0; } #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ diff --git a/mm/pgtable-generic.c b/mm/pgtable-generic.c index 48851894e699..c9c59bb75a17 100644 --- a/mm/pgtable-generic.c +++ b/mm/pgtable-generic.c @@ -82,12 +82,13 @@ pte_t ptep_clear_flush(struct vm_area_struct *vma, unsigned long address, } #endif +#ifdef CONFIG_TRANSPARENT_HUGEPAGE + #ifndef __HAVE_ARCH_PMDP_SET_ACCESS_FLAGS int pmdp_set_access_flags(struct vm_area_struct *vma, unsigned long address, pmd_t *pmdp, pmd_t entry, int dirty) { -#ifdef CONFIG_TRANSPARENT_HUGEPAGE int changed = !pmd_same(*pmdp, entry); VM_BUG_ON(address & ~HPAGE_PMD_MASK); if (changed) { @@ -95,10 +96,6 @@ int pmdp_set_access_flags(struct vm_area_struct *vma, flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE); } return changed; -#else /* CONFIG_TRANSPARENT_HUGEPAGE */ - BUG(); - return 0; -#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ } #endif @@ -107,11 +104,7 @@ int pmdp_clear_flush_young(struct vm_area_struct *vma, unsigned long address, pmd_t *pmdp) { int young; -#ifdef CONFIG_TRANSPARENT_HUGEPAGE VM_BUG_ON(address & ~HPAGE_PMD_MASK); -#else - BUG(); -#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ young = pmdp_test_and_clear_young(vma, address, pmdp); if (young) flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE); @@ -120,7 +113,6 @@ int pmdp_clear_flush_young(struct vm_area_struct *vma, #endif #ifndef __HAVE_ARCH_PMDP_HUGE_CLEAR_FLUSH -#ifdef CONFIG_TRANSPARENT_HUGEPAGE pmd_t pmdp_huge_clear_flush(struct vm_area_struct *vma, unsigned long address, pmd_t *pmdp) { @@ -131,11 +123,9 @@ pmd_t pmdp_huge_clear_flush(struct vm_area_struct *vma, unsigned long address, flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE); return pmd; } -#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ #endif #ifndef __HAVE_ARCH_PMDP_SPLITTING_FLUSH -#ifdef CONFIG_TRANSPARENT_HUGEPAGE void pmdp_splitting_flush(struct vm_area_struct *vma, unsigned long address, pmd_t *pmdp) { @@ -145,11 +135,9 @@ void pmdp_splitting_flush(struct vm_area_struct *vma, unsigned long address, /* tlb flush only to serialize against gup-fast */ flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE); } -#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ #endif #ifndef __HAVE_ARCH_PGTABLE_DEPOSIT -#ifdef CONFIG_TRANSPARENT_HUGEPAGE void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp, pgtable_t pgtable) { @@ -162,11 +150,9 @@ void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp, list_add(&pgtable->lru, &pmd_huge_pte(mm, pmdp)->lru); pmd_huge_pte(mm, pmdp) = pgtable; } -#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ #endif #ifndef __HAVE_ARCH_PGTABLE_WITHDRAW -#ifdef CONFIG_TRANSPARENT_HUGEPAGE /* no "address" argument so destroys page coloring of some arch */ pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp) { @@ -185,11 +171,9 @@ pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp) } return pgtable; } -#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ #endif #ifndef __HAVE_ARCH_PMDP_INVALIDATE -#ifdef CONFIG_TRANSPARENT_HUGEPAGE void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address, pmd_t *pmdp) { @@ -197,11 +181,9 @@ void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address, set_pmd_at(vma->vm_mm, address, pmdp, pmd_mknotpresent(entry)); flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE); } -#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ #endif #ifndef pmdp_collapse_flush -#ifdef CONFIG_TRANSPARENT_HUGEPAGE pmd_t pmdp_collapse_flush(struct vm_area_struct *vma, unsigned long address, pmd_t *pmdp) { @@ -217,5 +199,5 @@ pmd_t pmdp_collapse_flush(struct vm_area_struct *vma, unsigned long address, flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE); return pmd; } -#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ #endif +#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ From 12ebc1581ad114543ae822aa3a12f76072e2f902 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Fri, 20 Feb 2015 10:36:28 +0530 Subject: [PATCH 10/42] mm,thp: introduce flush_pmd_tlb_range ARCHes with special requirements for evicting THP backing TLB entries can implement this. Otherwise also, it can help optimize TLB flush in THP regime. stock flush_tlb_range() typically has optimization to nuke the entire TLB if flush span is greater than a certain threshhold, which will likely be true for a single huge page. Thus a single thp flush will invalidate the entrire TLB which is not desirable. e.g. see arch/arc: flush_pmd_tlb_range Acked-by: Kirill A. Shutemov Link: http://lkml.kernel.org/r/20151009100816.GC7873@node Signed-off-by: Vineet Gupta --- mm/huge_memory.c | 2 +- mm/pgtable-generic.c | 26 ++++++++++++++++++++------ 2 files changed, 21 insertions(+), 7 deletions(-) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 4b06b8db9df2..e25eb3d2081a 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -1880,7 +1880,7 @@ static int __split_huge_page_map(struct page *page, * here). But it is generally safer to never allow * small and huge TLB entries for the same virtual * address to be loaded simultaneously. So instead of - * doing "pmd_populate(); flush_tlb_range();" we first + * doing "pmd_populate(); flush_pmd_tlb_range();" we first * mark the current pmd notpresent (atomically because * here the pmd_trans_huge and pmd_trans_splitting * must remain set at all times on the pmd until the diff --git a/mm/pgtable-generic.c b/mm/pgtable-generic.c index c9c59bb75a17..7d3db0247983 100644 --- a/mm/pgtable-generic.c +++ b/mm/pgtable-generic.c @@ -84,6 +84,20 @@ pte_t ptep_clear_flush(struct vm_area_struct *vma, unsigned long address, #ifdef CONFIG_TRANSPARENT_HUGEPAGE +#ifndef __HAVE_ARCH_FLUSH_PMD_TLB_RANGE + +/* + * ARCHes with special requirements for evicting THP backing TLB entries can + * implement this. Otherwise also, it can help optimize normal TLB flush in + * THP regime. stock flush_tlb_range() typically has optimization to nuke the + * entire TLB TLB if flush span is greater than a threshhold, which will + * likely be true for a single huge page. Thus a single thp flush will + * invalidate the entire TLB which is not desitable. + * e.g. see arch/arc: flush_pmd_tlb_range + */ +#define flush_pmd_tlb_range(vma, addr, end) flush_tlb_range(vma, addr, end) +#endif + #ifndef __HAVE_ARCH_PMDP_SET_ACCESS_FLAGS int pmdp_set_access_flags(struct vm_area_struct *vma, unsigned long address, pmd_t *pmdp, @@ -93,7 +107,7 @@ int pmdp_set_access_flags(struct vm_area_struct *vma, VM_BUG_ON(address & ~HPAGE_PMD_MASK); if (changed) { set_pmd_at(vma->vm_mm, address, pmdp, entry); - flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE); + flush_pmd_tlb_range(vma, address, address + HPAGE_PMD_SIZE); } return changed; } @@ -107,7 +121,7 @@ int pmdp_clear_flush_young(struct vm_area_struct *vma, VM_BUG_ON(address & ~HPAGE_PMD_MASK); young = pmdp_test_and_clear_young(vma, address, pmdp); if (young) - flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE); + flush_pmd_tlb_range(vma, address, address + HPAGE_PMD_SIZE); return young; } #endif @@ -120,7 +134,7 @@ pmd_t pmdp_huge_clear_flush(struct vm_area_struct *vma, unsigned long address, VM_BUG_ON(address & ~HPAGE_PMD_MASK); VM_BUG_ON(!pmd_trans_huge(*pmdp)); pmd = pmdp_huge_get_and_clear(vma->vm_mm, address, pmdp); - flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE); + flush_pmd_tlb_range(vma, address, address + HPAGE_PMD_SIZE); return pmd; } #endif @@ -133,7 +147,7 @@ void pmdp_splitting_flush(struct vm_area_struct *vma, unsigned long address, VM_BUG_ON(address & ~HPAGE_PMD_MASK); set_pmd_at(vma->vm_mm, address, pmdp, pmd); /* tlb flush only to serialize against gup-fast */ - flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE); + flush_pmd_tlb_range(vma, address, address + HPAGE_PMD_SIZE); } #endif @@ -179,7 +193,7 @@ void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address, { pmd_t entry = *pmdp; set_pmd_at(vma->vm_mm, address, pmdp, pmd_mknotpresent(entry)); - flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE); + flush_pmd_tlb_range(vma, address, address + HPAGE_PMD_SIZE); } #endif @@ -196,7 +210,7 @@ pmd_t pmdp_collapse_flush(struct vm_area_struct *vma, unsigned long address, VM_BUG_ON(address & ~HPAGE_PMD_MASK); VM_BUG_ON(pmd_trans_huge(*pmdp)); pmd = pmdp_huge_get_and_clear(vma->vm_mm, address, pmdp); - flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE); + flush_pmd_tlb_range(vma, address, address + HPAGE_PMD_SIZE); return pmd; } #endif From 722fe8fd365a08bd53e9dd105009ab810107b02d Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Fri, 27 Feb 2015 19:36:35 +0530 Subject: [PATCH 11/42] ARCv2: mm: THP: Implement flush_pmd_tlb_range() optimization Implement the TLB flush routine to evict a sepcific Super TLB entry, vs. moving to a new ASID on every such flush. Signed-off-by: Vineet Gupta --- arch/arc/include/asm/hugepage.h | 4 ++++ arch/arc/mm/tlb.c | 20 ++++++++++++++++++++ 2 files changed, 24 insertions(+) diff --git a/arch/arc/include/asm/hugepage.h b/arch/arc/include/asm/hugepage.h index 1d0700c32b82..c5094de86403 100644 --- a/arch/arc/include/asm/hugepage.h +++ b/arch/arc/include/asm/hugepage.h @@ -74,4 +74,8 @@ extern void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp, #define __HAVE_ARCH_PGTABLE_WITHDRAW extern pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp); +#define __HAVE_ARCH_FLUSH_PMD_TLB_RANGE +extern void flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start, + unsigned long end); + #endif diff --git a/arch/arc/mm/tlb.c b/arch/arc/mm/tlb.c index 91905b1c3d72..005090e425f4 100644 --- a/arch/arc/mm/tlb.c +++ b/arch/arc/mm/tlb.c @@ -659,6 +659,26 @@ pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp) return pgtable; } +void flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start, + unsigned long end) +{ + unsigned int cpu; + unsigned long flags; + + local_irq_save(flags); + + cpu = smp_processor_id(); + + if (likely(asid_mm(vma->vm_mm, cpu) != MM_CTXT_NO_ASID)) { + unsigned int asid = hw_pid(vma->vm_mm, cpu); + + /* No need to loop here: this will always be for 1 Huge Page */ + tlb_entry_erase(start | _PAGE_HW_SZ | asid); + } + + local_irq_restore(flags); +} + #endif /* Read the Cache Build Confuration Registers, Decode them and save into From c7119d56d2755fc2770b0e2c1c4385e10f4c9161 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Thu, 15 Oct 2015 08:04:45 +0530 Subject: [PATCH 12/42] ARCv2: mm: THP: flush_pmd_tlb_range make SMP safe Signed-off-by: Vineet Gupta --- arch/arc/include/asm/tlbflush.h | 5 +++++ arch/arc/mm/tlb.c | 27 +++++++++++++++++++++++++-- 2 files changed, 30 insertions(+), 2 deletions(-) diff --git a/arch/arc/include/asm/tlbflush.h b/arch/arc/include/asm/tlbflush.h index 71c7b2e4b874..1fe9c8c80280 100644 --- a/arch/arc/include/asm/tlbflush.h +++ b/arch/arc/include/asm/tlbflush.h @@ -17,6 +17,8 @@ void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long page); void local_flush_tlb_kernel_range(unsigned long start, unsigned long end); void local_flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end); +void local_flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start, + unsigned long end); #ifndef CONFIG_SMP #define flush_tlb_range(vma, s, e) local_flush_tlb_range(vma, s, e) @@ -24,6 +26,7 @@ void local_flush_tlb_range(struct vm_area_struct *vma, #define flush_tlb_kernel_range(s, e) local_flush_tlb_kernel_range(s, e) #define flush_tlb_all() local_flush_tlb_all() #define flush_tlb_mm(mm) local_flush_tlb_mm(mm) +#define flush_pmd_tlb_range(vma, s, e) local_flush_pmd_tlb_range(vma, s, e) #else extern void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end); @@ -31,5 +34,7 @@ extern void flush_tlb_page(struct vm_area_struct *vma, unsigned long page); extern void flush_tlb_kernel_range(unsigned long start, unsigned long end); extern void flush_tlb_all(void); extern void flush_tlb_mm(struct mm_struct *mm); +extern void flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end); + #endif /* CONFIG_SMP */ #endif diff --git a/arch/arc/mm/tlb.c b/arch/arc/mm/tlb.c index 005090e425f4..29b587835974 100644 --- a/arch/arc/mm/tlb.c +++ b/arch/arc/mm/tlb.c @@ -421,6 +421,15 @@ static inline void ipi_flush_tlb_range(void *arg) local_flush_tlb_range(ta->ta_vma, ta->ta_start, ta->ta_end); } +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +static inline void ipi_flush_pmd_tlb_range(void *arg) +{ + struct tlb_args *ta = arg; + + local_flush_pmd_tlb_range(ta->ta_vma, ta->ta_start, ta->ta_end); +} +#endif + static inline void ipi_flush_tlb_kernel_range(void *arg) { struct tlb_args *ta = (struct tlb_args *)arg; @@ -461,6 +470,20 @@ void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, on_each_cpu_mask(mm_cpumask(vma->vm_mm), ipi_flush_tlb_range, &ta, 1); } +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +void flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start, + unsigned long end) +{ + struct tlb_args ta = { + .ta_vma = vma, + .ta_start = start, + .ta_end = end + }; + + on_each_cpu_mask(mm_cpumask(vma->vm_mm), ipi_flush_pmd_tlb_range, &ta, 1); +} +#endif + void flush_tlb_kernel_range(unsigned long start, unsigned long end) { struct tlb_args ta = { @@ -659,8 +682,8 @@ pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp) return pgtable; } -void flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start, - unsigned long end) +void local_flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start, + unsigned long end) { unsigned int cpu; unsigned long flags; From 9dbd3d9bfd56707f9b1ccc301506e2fac0e95795 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Sat, 5 Sep 2015 22:47:30 +0530 Subject: [PATCH 13/42] ARC: [arcompact] don't check for hard isr calling local_irq_enable() Historically this was done by ARC IDE driver, which is long gone. IRQ core is pretty robust now and already checks if IRQs are enabled in hard ISRs. Thus no point in checking this in arch code, for every call of irq enabled. Further if some driver does do that - let it bring down the system so we notice/fix this sooner than covering up for sucker This makes local_irq_enable() - for L1 only case atleast simple enough so we can inline it. Signed-off-by: Vineet Gupta --- arch/arc/include/asm/irqflags-compact.h | 14 ++++- arch/arc/kernel/intc-compact.c | 73 ++----------------------- 2 files changed, 18 insertions(+), 69 deletions(-) diff --git a/arch/arc/include/asm/irqflags-compact.h b/arch/arc/include/asm/irqflags-compact.h index aa805575c320..a9490841c801 100644 --- a/arch/arc/include/asm/irqflags-compact.h +++ b/arch/arc/include/asm/irqflags-compact.h @@ -91,7 +91,19 @@ static inline void arch_local_irq_restore(unsigned long flags) /* * Unconditionally Enable IRQs */ -extern void arch_local_irq_enable(void); +static inline void arch_local_irq_enable(void) +{ + unsigned long temp; + + __asm__ __volatile__( + " lr %0, [status32] \n" + " or %0, %0, %1 \n" + " flag %0 \n" + : "=&r"(temp) + : "n"((STATUS_E1_MASK | STATUS_E2_MASK)) + : "cc", "memory"); +} + /* * Unconditionally Disable IRQs diff --git a/arch/arc/kernel/intc-compact.c b/arch/arc/kernel/intc-compact.c index 039fac30b5c1..a1669cf2a277 100644 --- a/arch/arc/kernel/intc-compact.c +++ b/arch/arc/kernel/intc-compact.c @@ -148,78 +148,15 @@ IRQCHIP_DECLARE(arc_intc, "snps,arc700-intc", init_onchip_IRQ); void arch_local_irq_enable(void) { - unsigned long flags = arch_local_save_flags(); - /* Allow both L1 and L2 at the onset */ - flags |= (STATUS_E1_MASK | STATUS_E2_MASK); - - /* Called from hard ISR (between irq_enter and irq_exit) */ - if (in_irq()) { - - /* If in L2 ISR, don't re-enable any further IRQs as this can - * cause IRQ priorities to get upside down. e.g. it could allow - * L1 be taken while in L2 hard ISR which is wrong not only in - * theory, it can also cause the dreaded L1-L2-L1 scenario - */ - if (flags & STATUS_A2_MASK) - flags &= ~(STATUS_E1_MASK | STATUS_E2_MASK); - - /* Even if in L1 ISR, allowe Higher prio L2 IRQs */ - else if (flags & STATUS_A1_MASK) - flags &= ~(STATUS_E1_MASK); - } - - /* called from soft IRQ, ideally we want to re-enable all levels */ - - else if (in_softirq()) { - - /* However if this is case of L1 interrupted by L2, - * re-enabling both may cause whaco L1-L2-L1 scenario - * because ARC700 allows level 1 to interrupt an active L2 ISR - * Thus we disable both - * However some code, executing in soft ISR wants some IRQs - * to be enabled so we re-enable L2 only - * - * How do we determine L1 intr by L2 - * -A2 is set (means in L2 ISR) - * -E1 is set in this ISR's pt_regs->status32 which is - * saved copy of status32_l2 when l2 ISR happened - */ - struct pt_regs *pt = get_irq_regs(); - - if ((flags & STATUS_A2_MASK) && pt && - (pt->status32 & STATUS_A1_MASK)) { - /*flags &= ~(STATUS_E1_MASK | STATUS_E2_MASK); */ - flags &= ~(STATUS_E1_MASK); - } - } + if (flags & STATUS_A2_MASK) + flags |= STATUS_E2_MASK; + else if (flags & STATUS_A1_MASK) + flags |= STATUS_E1_MASK; arch_local_irq_restore(flags); } -#else /* ! CONFIG_ARC_COMPACT_IRQ_LEVELS */ - -/* - * Simpler version for only 1 level of interrupt - * Here we only Worry about Level 1 Bits - */ -void arch_local_irq_enable(void) -{ - unsigned long flags; - - /* - * ARC IDE Drivers tries to re-enable interrupts from hard-isr - * context which is simply wrong - */ - if (in_irq()) { - WARN_ONCE(1, "IRQ enabled from hard-isr"); - return; - } - - flags = arch_local_save_flags(); - flags |= (STATUS_E1_MASK | STATUS_E2_MASK); - arch_local_irq_restore(flags); -} -#endif EXPORT_SYMBOL(arch_local_irq_enable); +#endif From 55a2ae775ab4fe7aefa736e0fae6b8d4bd8aaab5 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Sat, 5 Sep 2015 23:08:31 +0530 Subject: [PATCH 14/42] ARC: [arcompact] entry.S: Improve early return from exception The requirement is to - Reenable Exceptions (AE cleared) - Reenable Interrupts (E1/E2 set) We need to do wiggle these bits into ERSTATUS and call RTIE. Prev version used the pre-exception STATUS32 as starting point for what goes into ERSTATUS. This required explicit fixups of U/DE/L bits. Instead, use the current (in-exception) STATUS32 as starting point. Being in exception handler U/DE/L can be safely assumed to be correct. Only AE/E1/E2 need to be fixed. So the new implementation is slightly better -Avoids read form memory -Is 4 bytes smaller for the typical 1 level of intr configuration -Depicts the semantics more clearly Signed-off-by: Vineet Gupta --- arch/arc/include/asm/entry-compact.h | 13 ++++++------- arch/arc/include/asm/irqflags-compact.h | 2 ++ 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/arch/arc/include/asm/entry-compact.h b/arch/arc/include/asm/entry-compact.h index 415443c2a8c4..1aff3be91075 100644 --- a/arch/arc/include/asm/entry-compact.h +++ b/arch/arc/include/asm/entry-compact.h @@ -110,13 +110,12 @@ .macro FAKE_RET_FROM_EXCPN - ld r9, [sp, PT_status32] - bic r9, r9, (STATUS_U_MASK|STATUS_DE_MASK) - bset r9, r9, STATUS_L_BIT - sr r9, [erstatus] - mov r9, 55f - sr r9, [eret] - + lr r9, [status32] + bclr r9, r9, STATUS_AE_BIT + or r9, r9, (STATUS_E1_MASK|STATUS_E2_MASK) + sr r9, [erstatus] + mov r9, 55f + sr r9, [eret] rtie 55: .endm diff --git a/arch/arc/include/asm/irqflags-compact.h b/arch/arc/include/asm/irqflags-compact.h index a9490841c801..d8c608174617 100644 --- a/arch/arc/include/asm/irqflags-compact.h +++ b/arch/arc/include/asm/irqflags-compact.h @@ -23,11 +23,13 @@ #define STATUS_E2_BIT 2 /* Int 2 enable */ #define STATUS_A1_BIT 3 /* Int 1 active */ #define STATUS_A2_BIT 4 /* Int 2 active */ +#define STATUS_AE_BIT 5 /* Exception active */ #define STATUS_E1_MASK (1< Date: Sun, 6 Sep 2015 19:11:12 +0530 Subject: [PATCH 15/42] ARC: [arcompact] entry.S: Document preemption games for L2 intr Signed-off-by: Vineet Gupta --- arch/arc/kernel/entry-compact.S | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/arch/arc/kernel/entry-compact.S b/arch/arc/kernel/entry-compact.S index 15d457b4403a..d9087a1236eb 100644 --- a/arch/arc/kernel/entry-compact.S +++ b/arch/arc/kernel/entry-compact.S @@ -175,12 +175,25 @@ ENTRY(handle_interrupt_level2) ;------------------------------------------------------ ; if L2 IRQ interrupted a L1 ISR, disable preemption + ; + ; This is to avoid a potential L1-L2-L1 scenario + ; -L1 IRQ taken + ; -L2 interrupts L1 (before L1 ISR could run) + ; -preemption off IRQ, user task in syscall picked to run + ; -RTIE to userspace + ; Returns from L2 context fine + ; But both L1 and L2 re-enabled, so another L1 can be taken + ; while prev L1 is still unserviced + ; ;------------------------------------------------------ + ; L2 interrupting L1 implies both L2 and L1 active + ; However both A2 and A1 are NOT set in STATUS32, thus + ; need to check STATUS32_L2 to determine if L1 was active + ld r9, [sp, PT_status32] ; get statu32_l2 (saved in pt_regs) bbit0 r9, STATUS_A1_BIT, 1f ; L1 not active when L2 IRQ, so normal - ; A1 is set in status32_l2 ; bump thread_info->preempt_count (Disable preemption) GET_CURR_THR_INFO_FROM_SP r10 ld r9, [r10, THREAD_INFO_PREEMPT_COUNT] From 9fabcc636bf57dcb9c6fc5b1f34861c548944fd4 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Thu, 8 Oct 2015 17:52:27 +0530 Subject: [PATCH 16/42] ARC: [arcompact] entry.S: Elide extra check/branch in exception ret path This is done by improving the laddering logic ! Before: if Exception goto excep_or_pure_k_ret if !Interrupt(L2) goto l1_chk else INTERRUPT_EPILOGUE 2 l1_chk: if !Interrupt(L1) (i.e. pure kernel mode) goto excep_or_pure_k_ret else INTERRUPT_EPILOGUE 1 excep_or_pure_k_ret: EXCEPTION_EPILOGUE Now: if !Interrupt(L1 or L2) (i.e. exception or pure kernel mode) goto excep_or_pure_k_ret ; guaranteed to be an interrupt if !Interrupt(L2) goto l1_ret else INTERRUPT_EPILOGUE 2 ; by virtue of above, no need to chk for L1 active l1_ret: INTERRUPT_EPILOGUE 1 excep_or_pure_k_ret: EXCEPTION_EPILOGUE Signed-off-by: Vineet Gupta --- arch/arc/kernel/entry-compact.S | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/arch/arc/kernel/entry-compact.S b/arch/arc/kernel/entry-compact.S index d9087a1236eb..5221f194602b 100644 --- a/arch/arc/kernel/entry-compact.S +++ b/arch/arc/kernel/entry-compact.S @@ -333,11 +333,10 @@ END(call_do_page_fault) ; Note that we use realtime STATUS32 (not pt_regs->status32) to ; decide that. - ; if Returning from Exception - btst r10, STATUS_AE_BIT - bnz .Lexcep_ret + and.f 0, r10, (STATUS_A1_MASK|STATUS_A2_MASK) + bz .Lexcep_or_pure_K_ret - ; Not Exception so maybe Interrupts (Level 1 or 2) + ; Returning from Interrupts (Level 1 or 2) #ifdef CONFIG_ARC_COMPACT_IRQ_LEVELS @@ -378,8 +377,7 @@ END(call_do_page_fault) st r9, [r10, THREAD_INFO_PREEMPT_COUNT] 149: - ;return from level 2 - INTERRUPT_EPILOGUE 2 + INTERRUPT_EPILOGUE 2 ; return from level 2 interrupt debug_marker_l2: rtie @@ -387,15 +385,11 @@ not_level2_interrupt: #endif - bbit0 r10, STATUS_A1_BIT, .Lpure_k_mode_ret - - ;return from level 1 - INTERRUPT_EPILOGUE 1 + INTERRUPT_EPILOGUE 1 ; return from level 1 interrupt debug_marker_l1: rtie -.Lexcep_ret: -.Lpure_k_mode_ret: +.Lexcep_or_pure_K_ret: ;this case is for syscalls or Exceptions or pure kernel mode From 5c35ee642a1d1341b225808b53fc69df2245b87e Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Tue, 29 Sep 2015 16:05:48 +0530 Subject: [PATCH 17/42] ARC: make write_aux_reg safer against macro substitution It was generating warnings when called as write_aux_reg(x, paddr >> 32) Signed-off-by: Vineet Gupta --- arch/arc/include/asm/arcregs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arc/include/asm/arcregs.h b/arch/arc/include/asm/arcregs.h index d8023bc8d1ad..431e82893fc8 100644 --- a/arch/arc/include/asm/arcregs.h +++ b/arch/arc/include/asm/arcregs.h @@ -120,7 +120,7 @@ /* gcc builtin sr needs reg param to be long immediate */ #define write_aux_reg(reg_immed, val) \ - __builtin_arc_sr((unsigned int)val, reg_immed) + __builtin_arc_sr((unsigned int)(val), reg_immed) #else From c583ee4fb013bcf3501b9f10c252ea44cf7c657a Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Tue, 29 Sep 2015 16:01:13 +0530 Subject: [PATCH 18/42] ARC: mm: MMU v1..v3 only selectable for ARCompact ISA based cores Signed-off-by: Vineet Gupta --- arch/arc/Kconfig | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig index 5912006391ed..f50ff986ed60 100644 --- a/arch/arc/Kconfig +++ b/arch/arc/Kconfig @@ -282,6 +282,8 @@ choice default ARC_MMU_V2 if ARC_CPU_750D default ARC_MMU_V4 if ARC_CPU_HS +if ISA_ARCOMPACT + config ARC_MMU_V1 bool "MMU v1" help @@ -301,6 +303,8 @@ config ARC_MMU_V3 Variable Page size (1k-16k), var JTLB size 128 x (2 or 4) Shared Address Spaces (SASID) +endif + config ARC_MMU_V4 bool "MMU v4" depends on ISA_ARCV2 From b598e17f6a7a3a9bb6e0953ef586ee3697b59fce Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Fri, 2 Oct 2015 12:25:35 +0530 Subject: [PATCH 19/42] ARC: mm: compute TLB size as needed from ways * sets This frees up some bits to hold more high level info such as PAE being present, w/o increasing the size of already bloated cpuinfo struct Signed-off-by: Vineet Gupta --- arch/arc/include/asm/arcregs.h | 4 ++-- arch/arc/mm/tlb.c | 9 ++++----- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/arch/arc/include/asm/arcregs.h b/arch/arc/include/asm/arcregs.h index 431e82893fc8..dc9e65a8d722 100644 --- a/arch/arc/include/asm/arcregs.h +++ b/arch/arc/include/asm/arcregs.h @@ -327,8 +327,8 @@ struct bcr_generic { */ struct cpuinfo_arc_mmu { - unsigned int ver:4, pg_sz_k:8, s_pg_sz_m:8, u_dtlb:6, u_itlb:6; - unsigned int num_tlb:16, sets:12, ways:4; + unsigned int ver:4, pg_sz_k:8, s_pg_sz_m:8, pad:12; + unsigned int sets:12, ways:4, u_dtlb:8, u_itlb:8; }; struct cpuinfo_arc_cache { diff --git a/arch/arc/mm/tlb.c b/arch/arc/mm/tlb.c index 29b587835974..25699db016b3 100644 --- a/arch/arc/mm/tlb.c +++ b/arch/arc/mm/tlb.c @@ -240,9 +240,10 @@ static void tlb_entry_insert(unsigned int pd0, unsigned int pd1) noinline void local_flush_tlb_all(void) { + struct cpuinfo_arc_mmu *mmu = &cpuinfo_arc700[smp_processor_id()].mmu; unsigned long flags; unsigned int entry; - struct cpuinfo_arc_mmu *mmu = &cpuinfo_arc700[smp_processor_id()].mmu; + int num_tlb = mmu->sets * mmu->ways; local_irq_save(flags); @@ -250,7 +251,7 @@ noinline void local_flush_tlb_all(void) write_aux_reg(ARC_REG_TLBPD1, 0); write_aux_reg(ARC_REG_TLBPD0, 0); - for (entry = 0; entry < mmu->num_tlb; entry++) { + for (entry = 0; entry < num_tlb; entry++) { /* write this entry to the TLB */ write_aux_reg(ARC_REG_TLBINDEX, entry); write_aux_reg(ARC_REG_TLBCOMMAND, TLBWrite); @@ -767,8 +768,6 @@ void read_decode_mmu_bcr(void) mmu->u_dtlb = mmu4->u_dtlb * 4; mmu->u_itlb = mmu4->u_itlb * 4; } - - mmu->num_tlb = mmu->sets * mmu->ways; } char *arc_mmu_mumbojumbo(int cpu_id, char *buf, int len) @@ -785,7 +784,7 @@ char *arc_mmu_mumbojumbo(int cpu_id, char *buf, int len) n += scnprintf(buf + n, len - n, "MMU [v%x]\t: %dk PAGE, %sJTLB %d (%dx%d), uDTLB %d, uITLB %d %s\n", p_mmu->ver, p_mmu->pg_sz_k, super_pg, - p_mmu->num_tlb, p_mmu->sets, p_mmu->ways, + p_mmu->sets * p_mmu->ways, p_mmu->sets, p_mmu->ways, p_mmu->u_dtlb, p_mmu->u_itlb, IS_ENABLED(CONFIG_ARC_MMU_SASID) ? ",SASID" : ""); From 964cf28f9d10f4e5229e4365258c292bc5c856b2 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Fri, 2 Oct 2015 19:20:27 +0530 Subject: [PATCH 20/42] ARC: boot log: move helper macros to header for reuse Signed-off-by: Vineet Gupta --- arch/arc/include/asm/setup.h | 7 +++++++ arch/arc/kernel/mcip.c | 3 +-- arch/arc/kernel/setup.c | 4 ---- arch/arc/mm/cache.c | 5 ++--- arch/arc/mm/tlb.c | 2 +- 5 files changed, 11 insertions(+), 10 deletions(-) diff --git a/arch/arc/include/asm/setup.h b/arch/arc/include/asm/setup.h index 6e3ef5ba4f74..307846691be6 100644 --- a/arch/arc/include/asm/setup.h +++ b/arch/arc/include/asm/setup.h @@ -33,4 +33,11 @@ extern int root_mountflags, end_mem; void setup_processor(void); void __init setup_arch_memory(void); +/* Helpers used in arc_*_mumbojumbo routines */ +#define IS_AVAIL1(v, s) ((v) ? s : "") +#define IS_DISABLED_RUN(v) ((v) ? "" : "(disabled) ") +#define IS_USED_RUN(v) ((v) ? "" : "(not used) ") +#define IS_USED_CFG(cfg) IS_USED_RUN(IS_ENABLED(cfg)) +#define IS_AVAIL2(v, s, cfg) IS_AVAIL1(v, s), IS_AVAIL1(v, IS_USED_CFG(cfg)) + #endif /* __ASMARC_SETUP_H */ diff --git a/arch/arc/kernel/mcip.c b/arch/arc/kernel/mcip.c index 4ffd1855f1bd..e48a1331c588 100644 --- a/arch/arc/kernel/mcip.c +++ b/arch/arc/kernel/mcip.c @@ -12,6 +12,7 @@ #include #include #include +#include static char smp_cpuinfo_buf[128]; static int idu_detected; @@ -122,8 +123,6 @@ struct plat_smp_ops plat_smp_ops = { void mcip_init_early_smp(void) { -#define IS_AVAIL1(var, str) ((var) ? str : "") - struct mcip_bcr { #ifdef CONFIG_CPU_BIG_ENDIAN unsigned int pad3:8, diff --git a/arch/arc/kernel/setup.c b/arch/arc/kernel/setup.c index cabde9dc0696..68d3e181a82f 100644 --- a/arch/arc/kernel/setup.c +++ b/arch/arc/kernel/setup.c @@ -160,10 +160,6 @@ static const struct cpuinfo_data arc_cpu_tbl[] = { { {0x00, NULL } } }; -#define IS_AVAIL1(v, s) ((v) ? s : "") -#define IS_USED_RUN(v) ((v) ? "" : "(not used) ") -#define IS_USED_CFG(cfg) IS_USED_RUN(IS_ENABLED(cfg)) -#define IS_AVAIL2(v, s, cfg) IS_AVAIL1(v, s), IS_AVAIL1(v, IS_USED_CFG(cfg)) static char *arc_cpu_mumbojumbo(int cpu_id, char *buf, int len) { diff --git a/arch/arc/mm/cache.c b/arch/arc/mm/cache.c index 0d1a6e96839f..ae3b772ecc4d 100644 --- a/arch/arc/mm/cache.c +++ b/arch/arc/mm/cache.c @@ -37,7 +37,6 @@ char *arc_cache_mumbojumbo(int c, char *buf, int len) int n = 0; struct cpuinfo_arc_cache *p; -#define IS_USED_RUN(v) ((v) ? "" : "(disabled) ") #define PR_CACHE(p, cfg, str) \ if (!(p)->ver) \ n += scnprintf(buf + n, len - n, str"\t\t: N/A\n"); \ @@ -47,7 +46,7 @@ char *arc_cache_mumbojumbo(int c, char *buf, int len) (p)->sz_k, (p)->assoc, (p)->line_len, \ (p)->vipt ? "VIPT" : "PIPT", \ (p)->alias ? " aliasing" : "", \ - IS_ENABLED(cfg) ? "" : " (not used)"); + IS_USED_CFG(cfg)); PR_CACHE(&cpuinfo_arc700[c].icache, CONFIG_ARC_HAS_ICACHE, "I-Cache"); PR_CACHE(&cpuinfo_arc700[c].dcache, CONFIG_ARC_HAS_DCACHE, "D-Cache"); @@ -63,7 +62,7 @@ char *arc_cache_mumbojumbo(int c, char *buf, int len) if (ioc_exists) n += scnprintf(buf + n, len - n, "IOC\t\t:%s\n", - IS_USED_RUN(ioc_enable)); + IS_DISABLED_RUN(ioc_enable)); return buf; } diff --git a/arch/arc/mm/tlb.c b/arch/arc/mm/tlb.c index 25699db016b3..a69f2078a96d 100644 --- a/arch/arc/mm/tlb.c +++ b/arch/arc/mm/tlb.c @@ -779,7 +779,7 @@ char *arc_mmu_mumbojumbo(int cpu_id, char *buf, int len) if (p_mmu->s_pg_sz_m) scnprintf(super_pg, 64, "%dM Super Page%s, ", p_mmu->s_pg_sz_m, - IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) ? "" : " (not used)"); + IS_USED_CFG(CONFIG_TRANSPARENT_HUGEPAGE)); n += scnprintf(buf + n, len - n, "MMU [v%x]\t: %dk PAGE, %sJTLB %d (%dx%d), uDTLB %d, uITLB %d %s\n", From d0890ea5b68f63d7b8641455dc5534886fee2fa1 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Fri, 2 Oct 2015 19:24:20 +0530 Subject: [PATCH 21/42] ARC: boot log: decode more mmu config items Signed-off-by: Vineet Gupta --- arch/arc/include/asm/arcregs.h | 2 +- arch/arc/mm/tlb.c | 14 ++++++++------ 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/arch/arc/include/asm/arcregs.h b/arch/arc/include/asm/arcregs.h index dc9e65a8d722..7fac7d85ed6a 100644 --- a/arch/arc/include/asm/arcregs.h +++ b/arch/arc/include/asm/arcregs.h @@ -327,7 +327,7 @@ struct bcr_generic { */ struct cpuinfo_arc_mmu { - unsigned int ver:4, pg_sz_k:8, s_pg_sz_m:8, pad:12; + unsigned int ver:4, pg_sz_k:8, s_pg_sz_m:8, pad:10, sasid:1, pae:1; unsigned int sets:12, ways:4, u_dtlb:8, u_itlb:8; }; diff --git a/arch/arc/mm/tlb.c b/arch/arc/mm/tlb.c index a69f2078a96d..2a30c91f7977 100644 --- a/arch/arc/mm/tlb.c +++ b/arch/arc/mm/tlb.c @@ -723,10 +723,10 @@ void read_decode_mmu_bcr(void) struct bcr_mmu_3 { #ifdef CONFIG_CPU_BIG_ENDIAN - unsigned int ver:8, ways:4, sets:4, osm:1, reserv:3, pg_sz:4, + unsigned int ver:8, ways:4, sets:4, res:3, sasid:1, pg_sz:4, u_itlb:4, u_dtlb:4; #else - unsigned int u_dtlb:4, u_itlb:4, pg_sz:4, reserv:3, osm:1, sets:4, + unsigned int u_dtlb:4, u_itlb:4, pg_sz:4, sasid:1, res:3, sets:4, ways:4, ver:8; #endif } *mmu3; @@ -747,7 +747,7 @@ void read_decode_mmu_bcr(void) if (mmu->ver <= 2) { mmu2 = (struct bcr_mmu_1_2 *)&tmp; - mmu->pg_sz_k = TO_KB(PAGE_SIZE); + mmu->pg_sz_k = TO_KB(0x2000); mmu->sets = 1 << mmu2->sets; mmu->ways = 1 << mmu2->ways; mmu->u_dtlb = mmu2->u_dtlb; @@ -759,6 +759,7 @@ void read_decode_mmu_bcr(void) mmu->ways = 1 << mmu3->ways; mmu->u_dtlb = mmu3->u_dtlb; mmu->u_itlb = mmu3->u_itlb; + mmu->sasid = mmu3->sasid; } else { mmu4 = (struct bcr_mmu_4 *)&tmp; mmu->pg_sz_k = 1 << (mmu4->sz0 - 1); @@ -767,6 +768,8 @@ void read_decode_mmu_bcr(void) mmu->ways = mmu4->n_ways * 2; mmu->u_dtlb = mmu4->u_dtlb * 4; mmu->u_itlb = mmu4->u_itlb * 4; + mmu->sasid = mmu4->sasid; + mmu->pae = mmu4->pae; } } @@ -782,11 +785,10 @@ char *arc_mmu_mumbojumbo(int cpu_id, char *buf, int len) IS_USED_CFG(CONFIG_TRANSPARENT_HUGEPAGE)); n += scnprintf(buf + n, len - n, - "MMU [v%x]\t: %dk PAGE, %sJTLB %d (%dx%d), uDTLB %d, uITLB %d %s\n", + "MMU [v%x]\t: %dK PAGE, %sJTLB %d (%dx%d), uDTLB %d, uITLB %d\n", p_mmu->ver, p_mmu->pg_sz_k, super_pg, p_mmu->sets * p_mmu->ways, p_mmu->sets, p_mmu->ways, - p_mmu->u_dtlb, p_mmu->u_itlb, - IS_ENABLED(CONFIG_ARC_MMU_SASID) ? ",SASID" : ""); + p_mmu->u_dtlb, p_mmu->u_itlb); return buf; } From f33e9c434b8ce833bd3dd39436bd0799c3e1d1c5 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Fri, 9 Oct 2015 12:16:02 +0530 Subject: [PATCH 22/42] ARC: smp: Move default boot kick/wait code out of MCIP into common code For non halt-on-reset case, all cores start of simultaneously in @stext. Master core0 proceeds with kernel boot, while other spin-wait on @wake_flag being set by master once it is ready. So NO hardware assist is needed for master to "kick" the others. This patch moves this soft implementation out of mcip.c (as there is no hardware assist) into common smp.c Signed-off-by: Vineet Gupta --- arch/arc/kernel/mcip.c | 18 ----------------- arch/arc/kernel/smp.c | 46 +++++++++++++++++++----------------------- 2 files changed, 21 insertions(+), 43 deletions(-) diff --git a/arch/arc/kernel/mcip.c b/arch/arc/kernel/mcip.c index e48a1331c588..e18d36eb0af6 100644 --- a/arch/arc/kernel/mcip.c +++ b/arch/arc/kernel/mcip.c @@ -97,26 +97,8 @@ static void mcip_ipi_clear(int irq) #endif } -volatile int wake_flag; - -static void mcip_wakeup_cpu(int cpu, unsigned long pc) -{ - BUG_ON(cpu == 0); - wake_flag = cpu; -} - -void arc_platform_smp_wait_to_boot(int cpu) -{ - while (wake_flag != cpu) - ; - - wake_flag = 0; - __asm__ __volatile__("j @first_lines_of_secondary \n"); -} - struct plat_smp_ops plat_smp_ops = { .info = smp_cpuinfo_buf, - .cpu_kick = mcip_wakeup_cpu, .ipi_send = mcip_ipi_send, .ipi_clear = mcip_ipi_clear, }; diff --git a/arch/arc/kernel/smp.c b/arch/arc/kernel/smp.c index be13d12420ba..f6175fc5f2bb 100644 --- a/arch/arc/kernel/smp.c +++ b/arch/arc/kernel/smp.c @@ -72,35 +72,29 @@ void __init smp_cpus_done(unsigned int max_cpus) } /* - * After power-up, a non Master CPU needs to wait for Master to kick start it - * - * The default implementation halts - * - * This relies on platform specific support allowing Master to directly set - * this CPU's PC (to be @first_lines_of_secondary() and kick start it. - * - * In lack of such h/w assist, platforms can override this function - * - make this function busy-spin on a token, eventually set by Master - * (from arc_platform_smp_wakeup_cpu()) - * - Once token is available, jump to @first_lines_of_secondary - * (using inline asm). - * - * Alert: can NOT use stack here as it has not been determined/setup for CPU. - * If it turns out to be elaborate, it's better to code it in assembly - * + * Default smp boot helper for Run-on-reset case where all cores start off + * together. Non-masters need to wait for Master to start running. + * This is implemented using a flag in memory, which Non-masters spin-wait on. + * Master sets it to cpu-id of core to "ungate" it. */ -void __weak arc_platform_smp_wait_to_boot(int cpu) +static volatile int wake_flag; + +static void arc_default_smp_cpu_kick(int cpu, unsigned long pc) { - /* - * As a hack for debugging - since debugger will single-step over the - * FLAG insn - wrap the halt itself it in a self loop - */ - __asm__ __volatile__( - "1: \n" - " flag 1 \n" - " b 1b \n"); + BUG_ON(cpu == 0); + wake_flag = cpu; } +void arc_platform_smp_wait_to_boot(int cpu) +{ + while (wake_flag != cpu) + ; + + wake_flag = 0; + __asm__ __volatile__("j @first_lines_of_secondary \n"); +} + + const char *arc_platform_smp_cpuinfo(void) { return plat_smp_ops.info ? : ""; @@ -161,6 +155,8 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle) if (plat_smp_ops.cpu_kick) plat_smp_ops.cpu_kick(cpu, (unsigned long)first_lines_of_secondary); + else + arc_default_smp_cpu_kick(cpu, (unsigned long)NULL); /* wait for 1 sec after kicking the secondary */ wait_till = jiffies + HZ; From 3971cdc202f638f252e39316d42492ace04cc1b1 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Fri, 9 Oct 2015 11:26:12 +0530 Subject: [PATCH 23/42] ARC: boot: Support Halt-on-reset and Run-on-reset SMP booting modes For Run-on-reset, non masters need to spin wait. For Halt-on-reset they can jump to entry point directly. Also while at it, made reset vector handler as "the" entry point for kernel including host debugger based boot (which uses the ELF header entry point) Signed-off-by: Vineet Gupta --- arch/arc/Kconfig | 10 +++++++ arch/arc/kernel/entry-arcv2.S | 2 +- arch/arc/kernel/entry-compact.S | 10 +++---- arch/arc/kernel/head.S | 47 +++++++++++++++++++-------------- arch/arc/kernel/vmlinux.lds.S | 2 +- 5 files changed, 42 insertions(+), 29 deletions(-) diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig index f50ff986ed60..cc938967282b 100644 --- a/arch/arc/Kconfig +++ b/arch/arc/Kconfig @@ -194,6 +194,16 @@ config NR_CPUS range 2 4096 default "4" +config ARC_SMP_HALT_ON_RESET + bool "Enable Halt-on-reset boot mode" + default y if ARC_UBOOT_SUPPORT + help + In SMP configuration cores can be configured as Halt-on-reset + or they could all start at same time. For Halt-on-reset, non + masters are parked until Master kicks them so they can start of + at designated entry point. For other case, all jump to common + entry point and spin wait for Master's signal. + endif #SMP menuconfig ARC_CACHE diff --git a/arch/arc/kernel/entry-arcv2.S b/arch/arc/kernel/entry-arcv2.S index 8fa76567e402..445e63a10754 100644 --- a/arch/arc/kernel/entry-arcv2.S +++ b/arch/arc/kernel/entry-arcv2.S @@ -24,7 +24,7 @@ .align 4 # Initial 16 slots are Exception Vectors -VECTOR stext ; Restart Vector (jump to entry point) +VECTOR res_service ; Reset Vector VECTOR mem_service ; Mem exception VECTOR instr_service ; Instrn Error VECTOR EV_MachineCheck ; Fatal Machine check diff --git a/arch/arc/kernel/entry-compact.S b/arch/arc/kernel/entry-compact.S index 5221f194602b..59f52035b4ea 100644 --- a/arch/arc/kernel/entry-compact.S +++ b/arch/arc/kernel/entry-compact.S @@ -86,7 +86,7 @@ */ ; ********* Critical System Events ********************** -VECTOR res_service ; 0x0, Restart Vector (0x0) +VECTOR res_service ; 0x0, Reset Vector (0x0) VECTOR mem_service ; 0x8, Mem exception (0x1) VECTOR instr_service ; 0x10, Instrn Error (0x2) @@ -155,13 +155,9 @@ int2_saved_reg: ; --------------------------------------------- .section .text, "ax",@progbits -res_service: ; processor restart - flag 0x1 ; not implemented - nop - nop -reserved: ; processor restart - rtie ; jump to processor initializations +reserved: + flag 1 ; Unexpected event, halt ;##################### Interrupt Handling ############################## diff --git a/arch/arc/kernel/head.S b/arch/arc/kernel/head.S index 812f95e6ae69..e7fa703c8d5e 100644 --- a/arch/arc/kernel/head.S +++ b/arch/arc/kernel/head.S @@ -50,28 +50,37 @@ .endm .section .init.text, "ax",@progbits - .type stext, @function - .globl stext -stext: - ;------------------------------------------------------------------- - ; Don't clobber r0-r2 yet. It might have bootloader provided info - ;------------------------------------------------------------------- + +;---------------------------------------------------------------- +; Default Reset Handler (jumped into from Reset vector) +; - Don't clobber r0,r1,r2 as they might have u-boot provided args +; - Platforms can override this weak version if needed +;---------------------------------------------------------------- +WEAK(res_service) + j stext +END(res_service) + +;---------------------------------------------------------------- +; Kernel Entry point +;---------------------------------------------------------------- +ENTRY(stext) CPU_EARLY_SETUP #ifdef CONFIG_SMP - ; Ensure Boot (Master) proceeds. Others wait in platform dependent way - ; IDENTITY Reg [ 3 2 1 0 ] - ; (cpu-id) ^^^ => Zero for UP ARC700 - ; => #Core-ID if SMP (Master 0) - ; Note that non-boot CPUs might not land here if halt-on-reset and - ; instead breath life from @first_lines_of_secondary, but we still - ; need to make sure only boot cpu takes this path. GET_CPU_ID r5 cmp r5, 0 - mov.ne r0, r5 - jne arc_platform_smp_wait_to_boot + mov.nz r0, r5 +#ifdef CONFIG_ARC_SMP_HALT_ON_RESET + ; Non-Master can proceed as system would be booted sufficiently + jnz first_lines_of_secondary +#else + ; Non-Masters wait for Master to boot enough and bring them up + jnz arc_platform_smp_wait_to_boot #endif + ; Master falls thru +#endif + ; Clear BSS before updating any globals ; XXX: use ZOL here mov r5, __bss_start @@ -102,16 +111,14 @@ stext: GET_TSK_STACK_BASE r9, sp ; r9 = tsk, sp = stack base(output) j start_kernel ; "C" entry point +END(stext) #ifdef CONFIG_SMP ;---------------------------------------------------------------- ; First lines of code run by secondary before jumping to 'C' ;---------------------------------------------------------------- .section .text, "ax",@progbits - .type first_lines_of_secondary, @function - .globl first_lines_of_secondary - -first_lines_of_secondary: +ENTRY(first_lines_of_secondary) CPU_EARLY_SETUP @@ -126,5 +133,5 @@ first_lines_of_secondary: GET_TSK_STACK_BASE r0, sp j start_kernel_secondary - +END(first_lines_of_secondary) #endif diff --git a/arch/arc/kernel/vmlinux.lds.S b/arch/arc/kernel/vmlinux.lds.S index dd35bde39f69..894e696bddaa 100644 --- a/arch/arc/kernel/vmlinux.lds.S +++ b/arch/arc/kernel/vmlinux.lds.S @@ -12,7 +12,7 @@ #include OUTPUT_ARCH(arc) -ENTRY(_stext) +ENTRY(res_service) #ifdef CONFIG_CPU_BIG_ENDIAN jiffies = jiffies_64 + 4; From e0868e6f673d0d2db6a3c3798605e6efb756e61e Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Mon, 12 Oct 2015 14:58:54 +0530 Subject: [PATCH 24/42] ARC: smp: irqchip: handle IPI as percpu irq like timer The reason this was not done so far was lack of genuine IPI_IRQ for ARC700, as we don't have a SMP version of core yet (which might change soon thx to EZChip). Nevertheles to increase the build coverage, we need to allow CONFIG_SMP for ARC700 and still be able to run it on a UP platform (nsim or AXS101) with a UP Device Tree (SMP-on-UP) The build itself requires some define for IPI_IRQ and even a dummy value is fine since that code won't run anyways. Signed-off-by: Vineet Gupta --- arch/arc/include/asm/irq.h | 1 + arch/arc/kernel/intc-compact.c | 17 ++++++++--------- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/arch/arc/include/asm/irq.h b/arch/arc/include/asm/irq.h index bc5103637326..4fd7d62a6e30 100644 --- a/arch/arc/include/asm/irq.h +++ b/arch/arc/include/asm/irq.h @@ -16,6 +16,7 @@ #ifdef CONFIG_ISA_ARCOMPACT #define TIMER0_IRQ 3 #define TIMER1_IRQ 4 +#define IPI_IRQ (NR_CPU_IRQS-1) /* dummy to enable SMP build for up hardware */ #else #define TIMER0_IRQ 16 #define TIMER1_IRQ 17 diff --git a/arch/arc/kernel/intc-compact.c b/arch/arc/kernel/intc-compact.c index a1669cf2a277..06bcedf19b62 100644 --- a/arch/arc/kernel/intc-compact.c +++ b/arch/arc/kernel/intc-compact.c @@ -79,17 +79,16 @@ static struct irq_chip onchip_intc = { static int arc_intc_domain_map(struct irq_domain *d, unsigned int irq, irq_hw_number_t hw) { - /* - * XXX: the IPI IRQ needs to be handled like TIMER too. However ARC core - * code doesn't own it (like TIMER0). ISS IDU / ezchip define it - * in platform header which can't be included here as it goes - * against multi-platform image philisophy - */ - if (irq == TIMER0_IRQ) + switch (irq) { + case TIMER0_IRQ: +#ifdef CONFIG_SMP + case IPI_IRQ: +#endif irq_set_chip_and_handler(irq, &onchip_intc, handle_percpu_irq); - else + break; + default: irq_set_chip_and_handler(irq, &onchip_intc, handle_level_irq); - + } return 0; } From 4c82f28617ab9ce938118f0b99156a96c64d3da0 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Tue, 13 Oct 2015 08:48:54 +0530 Subject: [PATCH 25/42] ARC: remove @init_time, @init_irq platform callbacks These are not in use for ARC platforms. Moreover DT mechanims exist to probe them w/o explicit platform calls. - clocksource drivers can use CLOCKSOURCE_OF_DECLARE() - intc IRQCHIP_DECLARE() calls + cascading inside DT allows external intc to be probed automatically Signed-off-by: Vineet Gupta --- arch/arc/include/asm/mach_desc.h | 6 ------ arch/arc/kernel/irq.c | 10 +++++----- arch/arc/kernel/time.c | 3 --- 3 files changed, 5 insertions(+), 14 deletions(-) diff --git a/arch/arc/include/asm/mach_desc.h b/arch/arc/include/asm/mach_desc.h index e8993a2be6c2..7c680910f104 100644 --- a/arch/arc/include/asm/mach_desc.h +++ b/arch/arc/include/asm/mach_desc.h @@ -23,11 +23,8 @@ * @dt_compat: Array of device tree 'compatible' strings * (XXX: although only 1st entry is looked at) * @init_early: Very early callback [called from setup_arch()] - * @init_irq: setup external IRQ controllers [called from init_IRQ()] * @init_smp: for each CPU (e.g. setup IPI) * [(M):init_IRQ(), (o):start_kernel_secondary()] - * @init_time: platform specific clocksource/clockevent registration - * [called from time_init()] * @init_machine: arch initcall level callback (e.g. populate static * platform devices or parse Devicetree) * @init_late: Late initcall level callback @@ -36,13 +33,10 @@ struct machine_desc { const char *name; const char **dt_compat; - void (*init_early)(void); - void (*init_irq)(void); #ifdef CONFIG_SMP void (*init_smp)(unsigned int); #endif - void (*init_time)(void); void (*init_machine)(void); void (*init_late)(void); diff --git a/arch/arc/kernel/irq.c b/arch/arc/kernel/irq.c index 2989a7bcf8a8..156489af75e8 100644 --- a/arch/arc/kernel/irq.c +++ b/arch/arc/kernel/irq.c @@ -19,11 +19,11 @@ */ void __init init_IRQ(void) { - /* Any external intc can be setup here */ - if (machine_desc->init_irq) - machine_desc->init_irq(); - - /* process the entire interrupt tree in one go */ + /* + * process the entire interrupt tree in one go + * Any external intc will be setup provided DT chains them + * properly + */ irqchip_init(); #ifdef CONFIG_SMP diff --git a/arch/arc/kernel/time.c b/arch/arc/kernel/time.c index 4294761a2b3e..dfad287f1db1 100644 --- a/arch/arc/kernel/time.c +++ b/arch/arc/kernel/time.c @@ -285,7 +285,4 @@ void __init time_init(void) /* sets up the periodic event timer */ arc_local_timer_setup(); - - if (machine_desc->init_time) - machine_desc->init_time(); } From e55af4da026ebdb9ded3cb7708b8a8bd7884ad3a Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Mon, 12 Oct 2015 16:28:55 +0530 Subject: [PATCH 26/42] ARC: smp: Introduce smp hook @init_early_smp for Master core This adds a platform agnostic early SMP init hook which is called on Master core before calling setup_processor() setup_arch() smp_init_cpus() smp_ops.init_early_smp() ... setup_processor() How this helps: - Used for one time init of certain SMP centric IP blocks, before calling setup_processor() which probes various bits of core, possibly including this block - Currently platforms need to call this IP block init from their init routines, which doesn't make sense as this is specific to ARC core and not platform and otherwise requires copy/paste in all (and hence a possible point of failure) e.g. MCIP init is called from 2 platforms currently (axs10x and sim) which will go away once we have this. This change only adds the hooks but they are empty for now. Next commit will populate them and remove the explicit init calls from platforms. Signed-off-by: Vineet Gupta --- arch/arc/include/asm/smp.h | 4 ++++ arch/arc/kernel/setup.c | 3 ++- arch/arc/kernel/smp.c | 12 ++++++++++-- 3 files changed, 16 insertions(+), 3 deletions(-) diff --git a/arch/arc/include/asm/smp.h b/arch/arc/include/asm/smp.h index 3845b9e94f69..5ea4cf8cd1a8 100644 --- a/arch/arc/include/asm/smp.h +++ b/arch/arc/include/asm/smp.h @@ -45,12 +45,16 @@ extern int smp_ipi_irq_setup(int cpu, int irq); * struct plat_smp_ops - SMP callbacks provided by platform to ARC SMP * * @info: SoC SMP specific info for /proc/cpuinfo etc + * @init_early_smp: A SMP specific h/w block can init itself + * Could be common across platforms so not covered by + * mach_desc->init_early() * @cpu_kick: For Master to kickstart a cpu (optionally at a PC) * @ipi_send: To send IPI to a @cpu * @ips_clear: To clear IPI received at @irq */ struct plat_smp_ops { const char *info; + void (*init_early_smp)(void); void (*cpu_kick)(int cpu, unsigned long pc); void (*ipi_send)(int cpu); void (*ipi_clear)(int irq); diff --git a/arch/arc/kernel/setup.c b/arch/arc/kernel/setup.c index 68d3e181a82f..c33e77c0ad3e 100644 --- a/arch/arc/kernel/setup.c +++ b/arch/arc/kernel/setup.c @@ -411,8 +411,9 @@ void __init setup_arch(char **cmdline_p) if (machine_desc->init_early) machine_desc->init_early(); - setup_processor(); smp_init_cpus(); + + setup_processor(); setup_arch_memory(); /* copy flat DT out of .init and then unflatten it */ diff --git a/arch/arc/kernel/smp.c b/arch/arc/kernel/smp.c index f6175fc5f2bb..a59a53f98877 100644 --- a/arch/arc/kernel/smp.c +++ b/arch/arc/kernel/smp.c @@ -42,8 +42,13 @@ void __init smp_prepare_boot_cpu(void) } /* - * Initialise the CPU possible map early - this describes the CPUs - * which may be present or become present in the system. + * Called from setup_arch() before calling setup_processor() + * + * - Initialise the CPU possible map early - this describes the CPUs + * which may be present or become present in the system. + * - Call early smp init hook. This can initialize a specific multi-core + * IP which is say common to several platforms (hence not part of + * platform specific int_early() hook) */ void __init smp_init_cpus(void) { @@ -51,6 +56,9 @@ void __init smp_init_cpus(void) for (i = 0; i < NR_CPUS; i++) set_cpu_possible(i, true); + + if (plat_smp_ops.init_early_smp) + plat_smp_ops.init_early_smp(); } /* called from init ( ) => process 1 */ From 26b8f996239884451aeb1213747e3ca808c26024 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Mon, 12 Oct 2015 16:38:07 +0530 Subject: [PATCH 27/42] ARCv2: smp: [plat-*]: No need to explicitly call mcip_init_early_smp() MCIP now registers it's own probe callback with smp_ops.init_early_smp() which is called by ARC common code, so no need for platforms to do that. This decouples the platforms and MCIP and helps confine MCIP details to it's own file. Signed-off-by: Vineet Gupta --- arch/arc/include/asm/mcip.h | 1 - arch/arc/kernel/mcip.c | 15 ++++++++------- arch/arc/plat-axs10x/axs10x.c | 5 ----- arch/arc/plat-sim/platform.c | 1 - 4 files changed, 8 insertions(+), 14 deletions(-) diff --git a/arch/arc/include/asm/mcip.h b/arch/arc/include/asm/mcip.h index 52c11f0bb0e5..c9b2b402a810 100644 --- a/arch/arc/include/asm/mcip.h +++ b/arch/arc/include/asm/mcip.h @@ -86,7 +86,6 @@ static inline void __mcip_cmd_data(unsigned int cmd, unsigned int param, __mcip_cmd(cmd, param); } -extern void mcip_init_early_smp(void); extern void mcip_init_smp(unsigned int cpu); #endif diff --git a/arch/arc/kernel/mcip.c b/arch/arc/kernel/mcip.c index e18d36eb0af6..96b0b62ced65 100644 --- a/arch/arc/kernel/mcip.c +++ b/arch/arc/kernel/mcip.c @@ -97,13 +97,7 @@ static void mcip_ipi_clear(int irq) #endif } -struct plat_smp_ops plat_smp_ops = { - .info = smp_cpuinfo_buf, - .ipi_send = mcip_ipi_send, - .ipi_clear = mcip_ipi_clear, -}; - -void mcip_init_early_smp(void) +static void mcip_probe_n_setup(void) { struct mcip_bcr { #ifdef CONFIG_CPU_BIG_ENDIAN @@ -142,6 +136,13 @@ void mcip_init_early_smp(void) panic("kernel trying to use non-existent GRTC\n"); } +struct plat_smp_ops plat_smp_ops = { + .info = smp_cpuinfo_buf, + .init_early_smp = mcip_probe_n_setup, + .ipi_send = mcip_ipi_send, + .ipi_clear = mcip_ipi_clear, +}; + /*************************************************************************** * ARCv2 Interrupt Distribution Unit (IDU) * diff --git a/arch/arc/plat-axs10x/axs10x.c b/arch/arc/plat-axs10x/axs10x.c index 0a77b19e1df8..9ce199963363 100644 --- a/arch/arc/plat-axs10x/axs10x.c +++ b/arch/arc/plat-axs10x/axs10x.c @@ -455,11 +455,6 @@ static void __init axs103_early_init(void) axs10x_print_board_ver(AXC003_CREG + 4088, "AXC003 CPU Card"); axs10x_early_init(); - -#ifdef CONFIG_ARC_MCIP - /* No Hardware init, but filling the smp ops callbacks */ - mcip_init_early_smp(); -#endif } #endif diff --git a/arch/arc/plat-sim/platform.c b/arch/arc/plat-sim/platform.c index d9e35b4a2f08..e4128cca1b52 100644 --- a/arch/arc/plat-sim/platform.c +++ b/arch/arc/plat-sim/platform.c @@ -31,7 +31,6 @@ static const char *simulation_compat[] __initconst = { MACHINE_START(SIMULATION, "simulation") .dt_compat = simulation_compat, #ifdef CONFIG_ARC_MCIP - .init_early = mcip_init_early_smp, .init_smp = mcip_init_smp, #endif MACHINE_END From 8721a7f5a6f95c38cacbe1be22c820a7698926ef Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Tue, 13 Oct 2015 15:26:00 +0530 Subject: [PATCH 28/42] ARC: smp: Rename platform hook @init_smp -> @init_cpu_smp This conveys better that it is called for each cpu Signed-off-by: Vineet Gupta --- arch/arc/include/asm/mach_desc.h | 4 ++-- arch/arc/kernel/irq.c | 4 ++-- arch/arc/kernel/smp.c | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/arc/include/asm/mach_desc.h b/arch/arc/include/asm/mach_desc.h index 7c680910f104..6ff657a904b6 100644 --- a/arch/arc/include/asm/mach_desc.h +++ b/arch/arc/include/asm/mach_desc.h @@ -23,7 +23,7 @@ * @dt_compat: Array of device tree 'compatible' strings * (XXX: although only 1st entry is looked at) * @init_early: Very early callback [called from setup_arch()] - * @init_smp: for each CPU (e.g. setup IPI) + * @init_cpu_smp: for each CPU as it is coming up (SMP as well as UP) * [(M):init_IRQ(), (o):start_kernel_secondary()] * @init_machine: arch initcall level callback (e.g. populate static * platform devices or parse Devicetree) @@ -35,7 +35,7 @@ struct machine_desc { const char **dt_compat; void (*init_early)(void); #ifdef CONFIG_SMP - void (*init_smp)(unsigned int); + void (*init_cpu_smp)(unsigned int); #endif void (*init_machine)(void); void (*init_late)(void); diff --git a/arch/arc/kernel/irq.c b/arch/arc/kernel/irq.c index 156489af75e8..1dd8f2685afc 100644 --- a/arch/arc/kernel/irq.c +++ b/arch/arc/kernel/irq.c @@ -28,8 +28,8 @@ void __init init_IRQ(void) #ifdef CONFIG_SMP /* Master CPU can initialize it's side of IPI */ - if (machine_desc->init_smp) - machine_desc->init_smp(smp_processor_id()); + if (machine_desc->init_cpu_smp) + machine_desc->init_cpu_smp(smp_processor_id()); #endif } diff --git a/arch/arc/kernel/smp.c b/arch/arc/kernel/smp.c index a59a53f98877..35ee18f1de58 100644 --- a/arch/arc/kernel/smp.c +++ b/arch/arc/kernel/smp.c @@ -131,8 +131,8 @@ void start_kernel_secondary(void) pr_info("## CPU%u LIVE ##: Executing Code...\n", cpu); - if (machine_desc->init_smp) - machine_desc->init_smp(cpu); + if (machine_desc->init_cpu_smp) + machine_desc->init_cpu_smp(cpu); arc_local_timer_setup(); From 286130ebf196d9643800977d57bdb7cda266b49e Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Wed, 14 Oct 2015 14:38:02 +0530 Subject: [PATCH 29/42] ARC: smp: Introduce smp hook @init_irq_cpu called for all cores Note this is not part of platform owned static machine_desc, but more of device owned plat_smp_ops (rather misnamed) which a IPI provider or some such typically defines. This will help us seperate out the IPI registration from platform specific init_cpu_smp() into device specific init_irq_cpu() Signed-off-by: Vineet Gupta --- arch/arc/include/asm/smp.h | 3 +++ arch/arc/kernel/irq.c | 6 +++++- arch/arc/kernel/smp.c | 4 ++++ 3 files changed, 12 insertions(+), 1 deletion(-) diff --git a/arch/arc/include/asm/smp.h b/arch/arc/include/asm/smp.h index 5ea4cf8cd1a8..133c867d15af 100644 --- a/arch/arc/include/asm/smp.h +++ b/arch/arc/include/asm/smp.h @@ -48,6 +48,8 @@ extern int smp_ipi_irq_setup(int cpu, int irq); * @init_early_smp: A SMP specific h/w block can init itself * Could be common across platforms so not covered by * mach_desc->init_early() + * @init_irq_cpu: Called for each core so SMP h/w block driver can do + * any needed setup per cpu (e.g. IPI request) * @cpu_kick: For Master to kickstart a cpu (optionally at a PC) * @ipi_send: To send IPI to a @cpu * @ips_clear: To clear IPI received at @irq @@ -55,6 +57,7 @@ extern int smp_ipi_irq_setup(int cpu, int irq); struct plat_smp_ops { const char *info; void (*init_early_smp)(void); + void (*init_irq_cpu)(int cpu); void (*cpu_kick)(int cpu, unsigned long pc); void (*ipi_send)(int cpu); void (*ipi_clear)(int irq); diff --git a/arch/arc/kernel/irq.c b/arch/arc/kernel/irq.c index 1dd8f2685afc..2ee226546c6a 100644 --- a/arch/arc/kernel/irq.c +++ b/arch/arc/kernel/irq.c @@ -10,6 +10,7 @@ #include #include #include +#include /* * Late Interrupt system init called from start_kernel for Boot CPU only @@ -27,7 +28,10 @@ void __init init_IRQ(void) irqchip_init(); #ifdef CONFIG_SMP - /* Master CPU can initialize it's side of IPI */ + /* a SMP H/w block could do IPI IRQ request here */ + if (plat_smp_ops.init_irq_cpu) + plat_smp_ops.init_irq_cpu(smp_processor_id()); + if (machine_desc->init_cpu_smp) machine_desc->init_cpu_smp(smp_processor_id()); #endif diff --git a/arch/arc/kernel/smp.c b/arch/arc/kernel/smp.c index 35ee18f1de58..580587805fa3 100644 --- a/arch/arc/kernel/smp.c +++ b/arch/arc/kernel/smp.c @@ -131,6 +131,10 @@ void start_kernel_secondary(void) pr_info("## CPU%u LIVE ##: Executing Code...\n", cpu); + /* Some SMP H/w setup - for each cpu */ + if (plat_smp_ops.init_irq_cpu) + plat_smp_ops.init_irq_cpu(cpu); + if (machine_desc->init_cpu_smp) machine_desc->init_cpu_smp(cpu); From aa0efcde45a36d1ea2bc5bde4d47f36ec17502de Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Mon, 12 Oct 2015 15:15:48 +0530 Subject: [PATCH 30/42] ARCv2: smp: [plat-*]: No need to explicitly call mcip_init_smp() MCIP now registers it's own per cpu setup routine (for IPI IRQ request) using smp_ops.init_irq_cpu(). So no need for platforms to do that. This now completely decouples platforms from MCIP. Signed-off-by: Vineet Gupta --- arch/arc/include/asm/mcip.h | 2 -- arch/arc/kernel/mcip.c | 10 ++-------- arch/arc/plat-axs10x/axs10x.c | 3 --- arch/arc/plat-sim/platform.c | 3 --- 4 files changed, 2 insertions(+), 16 deletions(-) diff --git a/arch/arc/include/asm/mcip.h b/arch/arc/include/asm/mcip.h index c9b2b402a810..46f4e5351b2a 100644 --- a/arch/arc/include/asm/mcip.h +++ b/arch/arc/include/asm/mcip.h @@ -86,8 +86,6 @@ static inline void __mcip_cmd_data(unsigned int cmd, unsigned int param, __mcip_cmd(cmd, param); } -extern void mcip_init_smp(unsigned int cpu); - #endif #endif diff --git a/arch/arc/kernel/mcip.c b/arch/arc/kernel/mcip.c index 96b0b62ced65..74a9b074ac3e 100644 --- a/arch/arc/kernel/mcip.c +++ b/arch/arc/kernel/mcip.c @@ -19,14 +19,7 @@ static int idu_detected; static DEFINE_RAW_SPINLOCK(mcip_lock); -/* - * Any SMP specific init any CPU does when it comes up. - * Here we setup the CPU to enable Inter-Processor-Interrupts - * Called for each CPU - * -Master : init_IRQ() - * -Other(s) : start_kernel_secondary() - */ -void mcip_init_smp(unsigned int cpu) +static void mcip_setup_per_cpu(int cpu) { smp_ipi_irq_setup(cpu, IPI_IRQ); } @@ -139,6 +132,7 @@ static void mcip_probe_n_setup(void) struct plat_smp_ops plat_smp_ops = { .info = smp_cpuinfo_buf, .init_early_smp = mcip_probe_n_setup, + .init_irq_cpu = mcip_setup_per_cpu, .ipi_send = mcip_ipi_send, .ipi_clear = mcip_ipi_clear, }; diff --git a/arch/arc/plat-axs10x/axs10x.c b/arch/arc/plat-axs10x/axs10x.c index 9ce199963363..1b0f0f458a2b 100644 --- a/arch/arc/plat-axs10x/axs10x.c +++ b/arch/arc/plat-axs10x/axs10x.c @@ -482,9 +482,6 @@ static const char *axs103_compat[] __initconst = { MACHINE_START(AXS103, "axs103") .dt_compat = axs103_compat, .init_early = axs103_early_init, -#ifdef CONFIG_ARC_MCIP - .init_smp = mcip_init_smp, -#endif MACHINE_END /* diff --git a/arch/arc/plat-sim/platform.c b/arch/arc/plat-sim/platform.c index e4128cca1b52..dde692812bc1 100644 --- a/arch/arc/plat-sim/platform.c +++ b/arch/arc/plat-sim/platform.c @@ -30,7 +30,4 @@ static const char *simulation_compat[] __initconst = { MACHINE_START(SIMULATION, "simulation") .dt_compat = simulation_compat, -#ifdef CONFIG_ARC_MCIP - .init_smp = mcip_init_smp, -#endif MACHINE_END From 483bcc99c0a349570b30fc9cb20dea20c9387a4b Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Thu, 15 Oct 2015 20:32:39 +0530 Subject: [PATCH 31/42] ARC: boot: Non Master cpus only need to call EARLY_CPU_SETUP once With prev fixes, all cores now start via common entry point @stext which already calls EARLY_CPU_SETUP for all cores - so no need to invoke it again Signed-off-by: Vineet Gupta --- arch/arc/kernel/head.S | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/arc/kernel/head.S b/arch/arc/kernel/head.S index e7fa703c8d5e..689dd867fdff 100644 --- a/arch/arc/kernel/head.S +++ b/arch/arc/kernel/head.S @@ -120,8 +120,6 @@ END(stext) .section .text, "ax",@progbits ENTRY(first_lines_of_secondary) - CPU_EARLY_SETUP - ; setup per-cpu idle task as "current" on this CPU ld r0, [@secondary_idle_tsk] SET_CURR_TASK_ON_CPU r0, r1 From f759ee57b205223e98fdc97e26cbef305b8048e1 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Fri, 23 Jan 2015 18:10:26 +0530 Subject: [PATCH 32/42] ARC: Ensure DT mem base is same as what kernel is built with Signed-off-by: Vineet Gupta --- arch/arc/boot/dts/axc001.dtsi | 2 +- arch/arc/boot/dts/axc003.dtsi | 2 +- arch/arc/boot/dts/axc003_idu.dtsi | 2 +- arch/arc/boot/dts/skeleton.dtsi | 2 +- arch/arc/boot/dts/vdk_axc003.dtsi | 2 +- arch/arc/boot/dts/vdk_axc003_idu.dtsi | 2 +- arch/arc/mm/init.c | 2 ++ 7 files changed, 8 insertions(+), 6 deletions(-) diff --git a/arch/arc/boot/dts/axc001.dtsi b/arch/arc/boot/dts/axc001.dtsi index a5e2726a067e..420dcfde289f 100644 --- a/arch/arc/boot/dts/axc001.dtsi +++ b/arch/arc/boot/dts/axc001.dtsi @@ -95,6 +95,6 @@ #size-cells = <1>; ranges = <0x00000000 0x80000000 0x40000000>; device_type = "memory"; - reg = <0x00000000 0x20000000>; /* 512MiB */ + reg = <0x80000000 0x20000000>; /* 512MiB */ }; }; diff --git a/arch/arc/boot/dts/axc003.dtsi b/arch/arc/boot/dts/axc003.dtsi index 846481f37eef..f90fadf7f94e 100644 --- a/arch/arc/boot/dts/axc003.dtsi +++ b/arch/arc/boot/dts/axc003.dtsi @@ -98,6 +98,6 @@ #size-cells = <1>; ranges = <0x00000000 0x80000000 0x40000000>; device_type = "memory"; - reg = <0x00000000 0x20000000>; /* 512MiB */ + reg = <0x80000000 0x20000000>; /* 512MiB */ }; }; diff --git a/arch/arc/boot/dts/axc003_idu.dtsi b/arch/arc/boot/dts/axc003_idu.dtsi index 2f0b33257db2..06a9f294a2e6 100644 --- a/arch/arc/boot/dts/axc003_idu.dtsi +++ b/arch/arc/boot/dts/axc003_idu.dtsi @@ -121,6 +121,6 @@ #size-cells = <1>; ranges = <0x00000000 0x80000000 0x40000000>; device_type = "memory"; - reg = <0x00000000 0x20000000>; /* 512MiB */ + reg = <0x80000000 0x20000000>; /* 512MiB */ }; }; diff --git a/arch/arc/boot/dts/skeleton.dtsi b/arch/arc/boot/dts/skeleton.dtsi index a870bdd5e404..296d371a335c 100644 --- a/arch/arc/boot/dts/skeleton.dtsi +++ b/arch/arc/boot/dts/skeleton.dtsi @@ -32,6 +32,6 @@ memory { device_type = "memory"; - reg = <0x00000000 0x10000000>; /* 256M */ + reg = <0x80000000 0x10000000>; /* 256M */ }; }; diff --git a/arch/arc/boot/dts/vdk_axc003.dtsi b/arch/arc/boot/dts/vdk_axc003.dtsi index 9393fd902f0d..84226bd48baf 100644 --- a/arch/arc/boot/dts/vdk_axc003.dtsi +++ b/arch/arc/boot/dts/vdk_axc003.dtsi @@ -56,6 +56,6 @@ #size-cells = <1>; ranges = <0x00000000 0x80000000 0x40000000>; device_type = "memory"; - reg = <0x00000000 0x20000000>; /* 512MiB */ + reg = <0x80000000 0x20000000>; /* 512MiB */ }; }; diff --git a/arch/arc/boot/dts/vdk_axc003_idu.dtsi b/arch/arc/boot/dts/vdk_axc003_idu.dtsi index 9bee8ed09eb0..31f0fb5fc91d 100644 --- a/arch/arc/boot/dts/vdk_axc003_idu.dtsi +++ b/arch/arc/boot/dts/vdk_axc003_idu.dtsi @@ -71,6 +71,6 @@ #size-cells = <1>; ranges = <0x00000000 0x80000000 0x40000000>; device_type = "memory"; - reg = <0x00000000 0x20000000>; /* 512MiB */ + reg = <0x80000000 0x20000000>; /* 512MiB */ }; }; diff --git a/arch/arc/mm/init.c b/arch/arc/mm/init.c index d44eedd8c322..5121b9b16ba8 100644 --- a/arch/arc/mm/init.c +++ b/arch/arc/mm/init.c @@ -43,6 +43,8 @@ void __init early_init_dt_add_memory_arch(u64 base, u64 size) { arc_mem_sz = size & PAGE_MASK; pr_info("Memory size set via devicetree %ldM\n", TO_MB(arc_mem_sz)); + + BUG_ON(base != CONFIG_LINUX_LINK_BASE); } #ifdef CONFIG_BLK_DEV_INITRD From 9acdc911b55569145034b01075adf658891afbd2 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Mon, 26 Oct 2015 10:52:57 +0530 Subject: [PATCH 33/42] MAINTAINERS: Add public mailing list for ARC Cc: #3.9+ Signed-off-by: Vineet Gupta --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 797236befd27..08adb4a5c49c 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -10069,6 +10069,7 @@ F: include/net/switchdev.h SYNOPSYS ARC ARCHITECTURE M: Vineet Gupta +L: linux-snps-arc@lists.infraded.org S: Supported F: arch/arc/ F: Documentation/devicetree/bindings/arc/* From 8840e14cd82d398d348b2947fad3a630e93260ba Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Tue, 13 Oct 2015 12:11:38 +0530 Subject: [PATCH 34/42] ARC: mm: Improve Duplicate PD Fault handler - Move the verbosity knob from .data to .bss by using inverted logic - No need to readout PD1 descriptor - clip the non pfn bits of PD0 to avoid clipping inside the loop Signed-off-by: Vineet Gupta --- arch/arc/mm/tlb.c | 44 ++++++++++++++++++++++---------------------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/arch/arc/mm/tlb.c b/arch/arc/mm/tlb.c index 2a30c91f7977..5dcae21dd8dc 100644 --- a/arch/arc/mm/tlb.c +++ b/arch/arc/mm/tlb.c @@ -856,15 +856,15 @@ void arc_mmu_init(void) * the duplicate one. * -Knob to be verbose abt it.(TODO: hook them up to debugfs) */ -volatile int dup_pd_verbose = 1;/* Be slient abt it or complain (default) */ +volatile int dup_pd_silent; /* Be slient abt it or complain (default) */ void do_tlb_overlap_fault(unsigned long cause, unsigned long address, struct pt_regs *regs) { - int set, way, n; - unsigned long flags, is_valid; struct cpuinfo_arc_mmu *mmu = &cpuinfo_arc700[smp_processor_id()].mmu; - unsigned int pd0[mmu->ways], pd1[mmu->ways]; + unsigned int pd0[mmu->ways]; + unsigned long flags; + int set; local_irq_save(flags); @@ -874,14 +874,16 @@ void do_tlb_overlap_fault(unsigned long cause, unsigned long address, /* loop thru all sets of TLB */ for (set = 0; set < mmu->sets; set++) { + int is_valid, way; + /* read out all the ways of current set */ for (way = 0, is_valid = 0; way < mmu->ways; way++) { write_aux_reg(ARC_REG_TLBINDEX, SET_WAY_TO_IDX(mmu, set, way)); write_aux_reg(ARC_REG_TLBCOMMAND, TLBRead); pd0[way] = read_aux_reg(ARC_REG_TLBPD0); - pd1[way] = read_aux_reg(ARC_REG_TLBPD1); is_valid |= pd0[way] & _PAGE_PRESENT; + pd0[way] &= PAGE_MASK; } /* If all the WAYS in SET are empty, skip to next SET */ @@ -890,30 +892,28 @@ void do_tlb_overlap_fault(unsigned long cause, unsigned long address, /* Scan the set for duplicate ways: needs a nested loop */ for (way = 0; way < mmu->ways - 1; way++) { + + int n; + if (!pd0[way]) continue; for (n = way + 1; n < mmu->ways; n++) { - if ((pd0[way] & PAGE_MASK) == - (pd0[n] & PAGE_MASK)) { + if (pd0[way] != pd0[n]) + continue; - if (dup_pd_verbose) { - pr_info("Duplicate PD's @" - "[%d:%d]/[%d:%d]\n", - set, way, set, n); - pr_info("TLBPD0[%u]: %08x\n", - way, pd0[way]); - } + if (!dup_pd_silent) + pr_info("Dup TLB PD0 %08x @ set %d ways %d,%d\n", + pd0[way], set, way, n); - /* - * clear entry @way and not @n. This is - * critical to our optimised loop - */ - pd0[way] = pd1[way] = 0; - write_aux_reg(ARC_REG_TLBINDEX, + /* + * clear entry @way and not @n. + * This is critical to our optimised loop + */ + pd0[way] = 0; + write_aux_reg(ARC_REG_TLBINDEX, SET_WAY_TO_IDX(mmu, set, way)); - __tlb_entry_erase(); - } + __tlb_entry_erase(); } } } From d40846457fc23cd841a60fdc2786e08a8bedb35b Mon Sep 17 00:00:00 2001 From: Alexey Brodkin Date: Wed, 2 Sep 2015 20:43:30 +0300 Subject: [PATCH 35/42] ARC: mm: use generic macros _BITUL()/_AC() Signed-off-by: Alexey Brodkin Signed-off-by: Vineet Gupta --- arch/arc/include/asm/pgtable.h | 11 ++++------- arch/arc/include/uapi/asm/page.h | 11 ++++------- 2 files changed, 8 insertions(+), 14 deletions(-) diff --git a/arch/arc/include/asm/pgtable.h b/arch/arc/include/asm/pgtable.h index 336267f2e9d9..f7c7273cd537 100644 --- a/arch/arc/include/asm/pgtable.h +++ b/arch/arc/include/asm/pgtable.h @@ -38,6 +38,7 @@ #include #include #include +#include /************************************************************************** * Page Table Flags @@ -207,13 +208,9 @@ #define PGDIR_SIZE (1UL << PGDIR_SHIFT) /* vaddr span, not PDG sz */ #define PGDIR_MASK (~(PGDIR_SIZE-1)) -#ifdef __ASSEMBLY__ -#define PTRS_PER_PTE (1 << BITS_FOR_PTE) -#define PTRS_PER_PGD (1 << BITS_FOR_PGD) -#else -#define PTRS_PER_PTE (1UL << BITS_FOR_PTE) -#define PTRS_PER_PGD (1UL << BITS_FOR_PGD) -#endif +#define PTRS_PER_PTE _BITUL(BITS_FOR_PTE) +#define PTRS_PER_PGD _BITUL(BITS_FOR_PGD) + /* * Number of entries a user land program use. * TASK_SIZE is the maximum vaddr that can be used by a userland program. diff --git a/arch/arc/include/uapi/asm/page.h b/arch/arc/include/uapi/asm/page.h index 9d129a2a1351..059aff38f10a 100644 --- a/arch/arc/include/uapi/asm/page.h +++ b/arch/arc/include/uapi/asm/page.h @@ -9,6 +9,8 @@ #ifndef _UAPI__ASM_ARC_PAGE_H #define _UAPI__ASM_ARC_PAGE_H +#include + /* PAGE_SHIFT determines the page size */ #if defined(CONFIG_ARC_PAGE_SIZE_16K) #define PAGE_SHIFT 14 @@ -25,13 +27,8 @@ #define PAGE_SHIFT 13 #endif -#ifdef __ASSEMBLY__ -#define PAGE_SIZE (1 << PAGE_SHIFT) -#define PAGE_OFFSET (0x80000000) -#else -#define PAGE_SIZE (1UL << PAGE_SHIFT) /* Default 8K */ -#define PAGE_OFFSET (0x80000000UL) /* Kernel starts at 2G onwards */ -#endif +#define PAGE_SIZE _BITUL(PAGE_SHIFT) /* Default 8K */ +#define PAGE_OFFSET _AC(0x80000000, UL) /* Kernel starts at 2G onwrds */ #define PAGE_MASK (~(PAGE_SIZE-1)) From 336e2136e1353db8e9e731c27381ee0735656a8a Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Thu, 5 Mar 2015 17:06:31 +0530 Subject: [PATCH 36/42] ARC: mm: preps ahead of HIGHMEM support Before we plug in highmem support, some of code needs to be ready for it - copy_user_highpage() needs to be using the kmap_atomic API - mk_pte() can't assume page_address() - do_page_fault() can't assume VMALLOC_END is end of kernel vaddr space Signed-off-by: Alexey Brodkin Signed-off-by: Vineet Gupta --- arch/arc/include/asm/pgtable.h | 9 +-------- arch/arc/mm/cache.c | 16 +++++++++++----- arch/arc/mm/fault.c | 13 ++++++++++--- 3 files changed, 22 insertions(+), 16 deletions(-) diff --git a/arch/arc/include/asm/pgtable.h b/arch/arc/include/asm/pgtable.h index f7c7273cd537..bd771351a1d1 100644 --- a/arch/arc/include/asm/pgtable.h +++ b/arch/arc/include/asm/pgtable.h @@ -270,13 +270,7 @@ static inline void pmd_set(pmd_t *pmdp, pte_t *ptep) (unsigned long)(((pte_val(x) - CONFIG_LINUX_LINK_BASE) >> \ PAGE_SHIFT))) -#define mk_pte(page, pgprot) \ -({ \ - pte_t pte; \ - pte_val(pte) = __pa(page_address(page)) + pgprot_val(pgprot); \ - pte; \ -}) - +#define mk_pte(page, prot) pfn_pte(page_to_pfn(page), prot) #define pte_pfn(pte) (pte_val(pte) >> PAGE_SHIFT) #define pfn_pte(pfn, prot) (__pte(((pfn) << PAGE_SHIFT) | pgprot_val(prot))) #define __pte_index(addr) (((addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) @@ -360,7 +354,6 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, #define pgd_offset_fast(mm, addr) pgd_offset(mm, addr) #endif -extern void paging_init(void); extern pgd_t swapper_pg_dir[] __aligned(PAGE_SIZE); void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t *ptep); diff --git a/arch/arc/mm/cache.c b/arch/arc/mm/cache.c index ae3b772ecc4d..521fb2bf90bd 100644 --- a/arch/arc/mm/cache.c +++ b/arch/arc/mm/cache.c @@ -806,8 +806,8 @@ void flush_anon_page(struct vm_area_struct *vma, struct page *page, void copy_user_highpage(struct page *to, struct page *from, unsigned long u_vaddr, struct vm_area_struct *vma) { - unsigned long kfrom = (unsigned long)page_address(from); - unsigned long kto = (unsigned long)page_address(to); + void *kfrom = kmap_atomic(from); + void *kto = kmap_atomic(to); int clean_src_k_mappings = 0; /* @@ -817,13 +817,16 @@ void copy_user_highpage(struct page *to, struct page *from, * * Note that while @u_vaddr refers to DST page's userspace vaddr, it is * equally valid for SRC page as well + * + * For !VIPT cache, all of this gets compiled out as + * addr_not_cache_congruent() is 0 */ if (page_mapped(from) && addr_not_cache_congruent(kfrom, u_vaddr)) { - __flush_dcache_page(kfrom, u_vaddr); + __flush_dcache_page((unsigned long)kfrom, u_vaddr); clean_src_k_mappings = 1; } - copy_page((void *)kto, (void *)kfrom); + copy_page(kto, kfrom); /* * Mark DST page K-mapping as dirty for a later finalization by @@ -840,11 +843,14 @@ void copy_user_highpage(struct page *to, struct page *from, * sync the kernel mapping back to physical page */ if (clean_src_k_mappings) { - __flush_dcache_page(kfrom, kfrom); + __flush_dcache_page((unsigned long)kfrom, (unsigned long)kfrom); set_bit(PG_dc_clean, &from->flags); } else { clear_bit(PG_dc_clean, &from->flags); } + + kunmap_atomic(kto); + kunmap_atomic(kfrom); } void clear_user_page(void *to, unsigned long u_vaddr, struct page *page) diff --git a/arch/arc/mm/fault.c b/arch/arc/mm/fault.c index d948e4e9d89c..af63f4a13e60 100644 --- a/arch/arc/mm/fault.c +++ b/arch/arc/mm/fault.c @@ -18,7 +18,14 @@ #include #include -static int handle_vmalloc_fault(unsigned long address) +/* + * kernel virtual address is required to implement vmalloc/pkmap/fixmap + * Refer to asm/processor.h for System Memory Map + * + * It simply copies the PMD entry (pointer to 2nd level page table or hugepage) + * from swapper pgdir to task pgdir. The 2nd level table/page is thus shared + */ +noinline static int handle_kernel_vaddr_fault(unsigned long address) { /* * Synchronize this task's top level page-table @@ -72,8 +79,8 @@ void do_page_fault(unsigned long address, struct pt_regs *regs) * only copy the information from the master page table, * nothing more. */ - if (address >= VMALLOC_START && address <= VMALLOC_END) { - ret = handle_vmalloc_fault(address); + if (address >= VMALLOC_START) { + ret = handle_kernel_vaddr_fault(address); if (unlikely(ret)) goto bad_area_nosemaphore; else From 6101be5ad439806c70b54bdd083e7db9e3affb3d Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Wed, 28 Oct 2015 18:48:17 +0530 Subject: [PATCH 37/42] ARC: mm: preps ahead of HIGHMEM support #2 Explicit'ify that all memory added so far is low memory Nothing semantical Signed-off-by: Vineet Gupta --- arch/arc/mm/init.c | 40 ++++++++++++++-------------------------- 1 file changed, 14 insertions(+), 26 deletions(-) diff --git a/arch/arc/mm/init.c b/arch/arc/mm/init.c index 5121b9b16ba8..a726a229baf6 100644 --- a/arch/arc/mm/init.c +++ b/arch/arc/mm/init.c @@ -24,16 +24,16 @@ pgd_t swapper_pg_dir[PTRS_PER_PGD] __aligned(PAGE_SIZE); char empty_zero_page[PAGE_SIZE] __aligned(PAGE_SIZE); EXPORT_SYMBOL(empty_zero_page); -/* Default tot mem from .config */ -static unsigned long arc_mem_sz = 0x20000000; /* some default */ +static const unsigned long low_mem_start = CONFIG_LINUX_LINK_BASE; +static unsigned long low_mem_sz; /* User can over-ride above with "mem=nnn[KkMm]" in cmdline */ static int __init setup_mem_sz(char *str) { - arc_mem_sz = memparse(str, NULL) & PAGE_MASK; + low_mem_sz = memparse(str, NULL) & PAGE_MASK; /* early console might not be setup yet - it will show up later */ - pr_info("\"mem=%s\": mem sz set to %ldM\n", str, TO_MB(arc_mem_sz)); + pr_info("\"mem=%s\": mem sz set to %ldM\n", str, TO_MB(low_mem_sz)); return 0; } @@ -41,10 +41,10 @@ early_param("mem", setup_mem_sz); void __init early_init_dt_add_memory_arch(u64 base, u64 size) { - arc_mem_sz = size & PAGE_MASK; - pr_info("Memory size set via devicetree %ldM\n", TO_MB(arc_mem_sz)); + low_mem_sz = size; + BUG_ON(base != low_mem_start); - BUG_ON(base != CONFIG_LINUX_LINK_BASE); + pr_info("Memory @ %llx of %ldM\n", base, TO_MB(size)); } #ifdef CONFIG_BLK_DEV_INITRD @@ -74,46 +74,34 @@ early_param("initrd", early_initrd); void __init setup_arch_memory(void) { unsigned long zones_size[MAX_NR_ZONES]; - unsigned long end_mem = CONFIG_LINUX_LINK_BASE + arc_mem_sz; init_mm.start_code = (unsigned long)_text; init_mm.end_code = (unsigned long)_etext; init_mm.end_data = (unsigned long)_edata; init_mm.brk = (unsigned long)_end; - /* - * We do it here, so that memory is correctly instantiated - * even if "mem=xxx" cmline over-ride is given and/or - * DT has memory node. Each causes an update to @arc_mem_sz - * and we finally add memory one here - */ - memblock_add(CONFIG_LINUX_LINK_BASE, arc_mem_sz); - - /*------------- externs in mm need setting up ---------------*/ - /* first page of system - kernel .vector starts here */ min_low_pfn = ARCH_PFN_OFFSET; - /* Last usable page of low mem (no HIGHMEM yet for ARC port) */ - max_low_pfn = max_pfn = PFN_DOWN(end_mem); + /* Last usable page of low mem */ + max_low_pfn = max_pfn = PFN_DOWN(low_mem_start + low_mem_sz); max_mapnr = max_low_pfn - min_low_pfn; - /*------------- reserve kernel image -----------------------*/ - memblock_reserve(CONFIG_LINUX_LINK_BASE, - __pa(_end) - CONFIG_LINUX_LINK_BASE); + /*------------- bootmem allocator setup -----------------------*/ + memblock_add(low_mem_start, low_mem_sz); + memblock_reserve(low_mem_start, __pa(_end) - low_mem_start); #ifdef CONFIG_BLK_DEV_INITRD - /*------------- reserve initrd image -----------------------*/ if (initrd_start) memblock_reserve(__pa(initrd_start), initrd_end - initrd_start); #endif memblock_dump_all(); - /*-------------- node setup --------------------------------*/ + /*----------------- node/zones setup --------------------------*/ memset(zones_size, 0, sizeof(zones_size)); - zones_size[ZONE_NORMAL] = max_mapnr; + zones_size[ZONE_NORMAL] = max_low_pfn - min_low_pfn; /* * We can't use the helper free_area_init(zones[]) because it uses From 45890f6d34e70d9dd194bd1729eba3ff72cabf78 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Mon, 9 Mar 2015 18:53:49 +0530 Subject: [PATCH 38/42] ARC: mm: HIGHMEM: kmap API implementation Implement kmap* API for ARC. This enables - permanent kernel maps (pkmaps): :kmap() API - fixmap : kmap_atomic() We use a very simple/uniform approach for both (unlike some of the other arches). So fixmap doesn't use the customary compile time address stuff. The important semantic is sleep'ability (pkmap) vs. not (fixmap) which the API guarantees. Note that this patch only enables highmem for subsequent PAE40 support as there is no real highmem for ARC in pure 32-bit paradigm as explained below. ARC has 2:2 address split of the 32-bit address space with lower half being translated (virtual) while upper half unstranslated (0x8000_0000 to 0xFFFF_FFFF). kernel itself is linked at base of unstranslated space (i.e. 0x8000_0000 onwards), which is mapped to say DDR 0x0 by external Bus Glue logic (outside the core). So kernel can potentially access 1.75G worth of memory directly w/o need for highmem. (the top 256M is taken by uncached peripheral space from 0xF000_0000 to 0xFFFF_FFFF) In PAE40, hardware can address memory beyond 4G (0x1_0000_0000) while the logical/virtual addresses remain 32-bits. Thus highmem is required for kernel proper to be able to access these pages for it's own purposes (user space is agnostic to this anyways). Signed-off-by: Alexey Brodkin Signed-off-by: Vineet Gupta --- arch/arc/Kconfig | 7 ++ arch/arc/include/asm/highmem.h | 61 +++++++++++++ arch/arc/include/asm/kmap_types.h | 18 ++++ arch/arc/include/asm/processor.h | 7 +- arch/arc/mm/Makefile | 1 + arch/arc/mm/highmem.c | 140 ++++++++++++++++++++++++++++++ 6 files changed, 233 insertions(+), 1 deletion(-) create mode 100644 arch/arc/include/asm/highmem.h create mode 100644 arch/arc/include/asm/kmap_types.h create mode 100644 arch/arc/mm/highmem.c diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig index cc938967282b..fd9632f4ddc8 100644 --- a/arch/arc/Kconfig +++ b/arch/arc/Kconfig @@ -446,6 +446,13 @@ config LINUX_LINK_BASE Linux needs to be scooted a bit. If you don't know what the above means, leave this setting alone. +config HIGHMEM + bool "High Memory Support" + help + With ARC 2G:2G address split, only upper 2G is directly addressable by + kernel. Enable this to potentially allow access to rest of 2G and PAE + in future + config ARC_CURR_IN_REG bool "Dedicate Register r25 for current_task pointer" default y diff --git a/arch/arc/include/asm/highmem.h b/arch/arc/include/asm/highmem.h new file mode 100644 index 000000000000..b1585c96324a --- /dev/null +++ b/arch/arc/include/asm/highmem.h @@ -0,0 +1,61 @@ +/* + * Copyright (C) 2015 Synopsys, Inc. (www.synopsys.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#ifndef _ASM_HIGHMEM_H +#define _ASM_HIGHMEM_H + +#ifdef CONFIG_HIGHMEM + +#include +#include + +/* start after vmalloc area */ +#define FIXMAP_BASE (PAGE_OFFSET - FIXMAP_SIZE - PKMAP_SIZE) +#define FIXMAP_SIZE PGDIR_SIZE /* only 1 PGD worth */ +#define KM_TYPE_NR ((FIXMAP_SIZE >> PAGE_SHIFT)/NR_CPUS) +#define FIXMAP_ADDR(nr) (FIXMAP_BASE + ((nr) << PAGE_SHIFT)) + +/* start after fixmap area */ +#define PKMAP_BASE (FIXMAP_BASE + FIXMAP_SIZE) +#define PKMAP_SIZE PGDIR_SIZE +#define LAST_PKMAP (PKMAP_SIZE >> PAGE_SHIFT) +#define LAST_PKMAP_MASK (LAST_PKMAP - 1) +#define PKMAP_ADDR(nr) (PKMAP_BASE + ((nr) << PAGE_SHIFT)) +#define PKMAP_NR(virt) (((virt) - PKMAP_BASE) >> PAGE_SHIFT) + +#define kmap_prot PAGE_KERNEL + + +#include + +extern void *kmap(struct page *page); +extern void *kmap_high(struct page *page); +extern void *kmap_atomic(struct page *page); +extern void __kunmap_atomic(void *kvaddr); +extern void kunmap_high(struct page *page); + +extern void kmap_init(void); + +static inline void flush_cache_kmaps(void) +{ + flush_cache_all(); +} + +static inline void kunmap(struct page *page) +{ + BUG_ON(in_interrupt()); + if (!PageHighMem(page)) + return; + kunmap_high(page); +} + + +#endif + +#endif diff --git a/arch/arc/include/asm/kmap_types.h b/arch/arc/include/asm/kmap_types.h new file mode 100644 index 000000000000..f0d7f6acea4e --- /dev/null +++ b/arch/arc/include/asm/kmap_types.h @@ -0,0 +1,18 @@ +/* + * Copyright (C) 2015 Synopsys, Inc. (www.synopsys.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#ifndef _ASM_KMAP_TYPES_H +#define _ASM_KMAP_TYPES_H + +/* + * We primarily need to define KM_TYPE_NR here but that in turn + * is a function of PGDIR_SIZE etc. + * To avoid circular deps issue, put everything in asm/highmem.h + */ +#endif diff --git a/arch/arc/include/asm/processor.h b/arch/arc/include/asm/processor.h index ee682d8e0213..44545354e9e8 100644 --- a/arch/arc/include/asm/processor.h +++ b/arch/arc/include/asm/processor.h @@ -114,7 +114,12 @@ extern unsigned int get_wchan(struct task_struct *p); * ----------------------------------------------------------------------------- */ #define VMALLOC_START 0x70000000 -#define VMALLOC_SIZE (PAGE_OFFSET - VMALLOC_START) + +/* + * 1 PGDIR_SIZE each for fixmap/pkmap, 2 PGDIR_SIZE gutter + * See asm/highmem.h for details + */ +#define VMALLOC_SIZE (PAGE_OFFSET - VMALLOC_START - PGDIR_SIZE * 4) #define VMALLOC_END (VMALLOC_START + VMALLOC_SIZE) #define USER_KERNEL_GUTTER 0x10000000 diff --git a/arch/arc/mm/Makefile b/arch/arc/mm/Makefile index 7beb941556c3..3703a4969349 100644 --- a/arch/arc/mm/Makefile +++ b/arch/arc/mm/Makefile @@ -8,3 +8,4 @@ obj-y := extable.o ioremap.o dma.o fault.o init.o obj-y += tlb.o tlbex.o cache.o mmap.o +obj-$(CONFIG_HIGHMEM) += highmem.o diff --git a/arch/arc/mm/highmem.c b/arch/arc/mm/highmem.c new file mode 100644 index 000000000000..065ee6bfa82a --- /dev/null +++ b/arch/arc/mm/highmem.c @@ -0,0 +1,140 @@ +/* + * Copyright (C) 2015 Synopsys, Inc. (www.synopsys.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#include +#include +#include +#include +#include +#include +#include + +/* + * HIGHMEM API: + * + * kmap() API provides sleep semantics hence refered to as "permanent maps" + * It allows mapping LAST_PKMAP pages, using @last_pkmap_nr as the cursor + * for book-keeping + * + * kmap_atomic() can't sleep (calls pagefault_disable()), thus it provides + * shortlived ala "temporary mappings" which historically were implemented as + * fixmaps (compile time addr etc). Their book-keeping is done per cpu. + * + * Both these facts combined (preemption disabled and per-cpu allocation) + * means the total number of concurrent fixmaps will be limited to max + * such allocations in a single control path. Thus KM_TYPE_NR (another + * historic relic) is a small'ish number which caps max percpu fixmaps + * + * ARC HIGHMEM Details + * + * - the kernel vaddr space from 0x7z to 0x8z (currently used by vmalloc/module) + * is now shared between vmalloc and kmap (non overlapping though) + * + * - Both fixmap/pkmap use a dedicated page table each, hooked up to swapper PGD + * This means each only has 1 PGDIR_SIZE worth of kvaddr mappings, which means + * 2M of kvaddr space for typical config (8K page and 11:8:13 traversal split) + * + * - fixmap anyhow needs a limited number of mappings. So 2M kvaddr == 256 PTE + * slots across NR_CPUS would be more than sufficient (generic code defines + * KM_TYPE_NR as 20). + * + * - pkmap being preemptible, in theory could do with more than 256 concurrent + * mappings. However, generic pkmap code: map_new_virtual(), doesn't traverse + * the PGD and only works with a single page table @pkmap_page_table, hence + * sets the limit + */ + +extern pte_t * pkmap_page_table; +static pte_t * fixmap_page_table; + +void *kmap(struct page *page) +{ + BUG_ON(in_interrupt()); + if (!PageHighMem(page)) + return page_address(page); + + return kmap_high(page); +} + +void *kmap_atomic(struct page *page) +{ + int idx, cpu_idx; + unsigned long vaddr; + + preempt_disable(); + pagefault_disable(); + if (!PageHighMem(page)) + return page_address(page); + + cpu_idx = kmap_atomic_idx_push(); + idx = cpu_idx + KM_TYPE_NR * smp_processor_id(); + vaddr = FIXMAP_ADDR(idx); + + set_pte_at(&init_mm, vaddr, fixmap_page_table + idx, + mk_pte(page, kmap_prot)); + + return (void *)vaddr; +} +EXPORT_SYMBOL(kmap_atomic); + +void __kunmap_atomic(void *kv) +{ + unsigned long kvaddr = (unsigned long)kv; + + if (kvaddr >= FIXMAP_BASE && kvaddr < (FIXMAP_BASE + FIXMAP_SIZE)) { + + /* + * Because preemption is disabled, this vaddr can be associated + * with the current allocated index. + * But in case of multiple live kmap_atomic(), it still relies on + * callers to unmap in right order. + */ + int cpu_idx = kmap_atomic_idx(); + int idx = cpu_idx + KM_TYPE_NR * smp_processor_id(); + + WARN_ON(kvaddr != FIXMAP_ADDR(idx)); + + pte_clear(&init_mm, kvaddr, fixmap_page_table + idx); + local_flush_tlb_kernel_range(kvaddr, kvaddr + PAGE_SIZE); + + kmap_atomic_idx_pop(); + } + + pagefault_enable(); + preempt_enable(); +} +EXPORT_SYMBOL(__kunmap_atomic); + +noinline pte_t *alloc_kmap_pgtable(unsigned long kvaddr) +{ + pgd_t *pgd_k; + pud_t *pud_k; + pmd_t *pmd_k; + pte_t *pte_k; + + pgd_k = pgd_offset_k(kvaddr); + pud_k = pud_offset(pgd_k, kvaddr); + pmd_k = pmd_offset(pud_k, kvaddr); + + pte_k = (pte_t *)alloc_bootmem_low_pages(PAGE_SIZE); + pmd_populate_kernel(&init_mm, pmd_k, pte_k); + return pte_k; +} + +void kmap_init(void) +{ + /* Due to recursive include hell, we can't do this in processor.h */ + BUILD_BUG_ON(PAGE_OFFSET < (VMALLOC_END + FIXMAP_SIZE + PKMAP_SIZE)); + + BUILD_BUG_ON(KM_TYPE_NR > PTRS_PER_PTE); + pkmap_page_table = alloc_kmap_pgtable(PKMAP_BASE); + + BUILD_BUG_ON(LAST_PKMAP > PTRS_PER_PTE); + fixmap_page_table = alloc_kmap_pgtable(FIXMAP_BASE); +} From 29e332261d2ae0900e3befffd90cd70594cd7a84 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Wed, 28 Oct 2015 19:06:10 +0530 Subject: [PATCH 39/42] ARC: mm: HIGHMEM: populate high memory from DT Signed-off-by: Vineet Gupta --- arch/arc/boot/dts/nsim_hs.dts | 12 +++++- arch/arc/mm/init.c | 70 ++++++++++++++++++++++++++++++++--- 2 files changed, 74 insertions(+), 8 deletions(-) diff --git a/arch/arc/boot/dts/nsim_hs.dts b/arch/arc/boot/dts/nsim_hs.dts index 911f069e0540..b0eb0e7fe21d 100644 --- a/arch/arc/boot/dts/nsim_hs.dts +++ b/arch/arc/boot/dts/nsim_hs.dts @@ -11,8 +11,16 @@ / { compatible = "snps,nsim_hs"; + #address-cells = <2>; + #size-cells = <2>; interrupt-parent = <&core_intc>; + memory { + device_type = "memory"; + reg = <0x0 0x80000000 0x0 0x40000000 /* 1 GB low mem */ + 0x1 0x00000000 0x0 0x40000000>; /* 1 GB highmem */ + }; + chosen { bootargs = "earlycon=arc_uart,mmio32,0xc0fc1000,115200n8 console=ttyARC0,115200n8"; }; @@ -26,8 +34,8 @@ #address-cells = <1>; #size-cells = <1>; - /* child and parent address space 1:1 mapped */ - ranges; + /* only perip space at end of low mem accessible */ + ranges = <0x80000000 0x0 0x80000000 0x80000000>; core_intc: core-interrupt-controller { compatible = "snps,archs-intc"; diff --git a/arch/arc/mm/init.c b/arch/arc/mm/init.c index a726a229baf6..a9305b5a2cd4 100644 --- a/arch/arc/mm/init.c +++ b/arch/arc/mm/init.c @@ -15,6 +15,7 @@ #endif #include #include +#include #include #include #include @@ -27,6 +28,12 @@ EXPORT_SYMBOL(empty_zero_page); static const unsigned long low_mem_start = CONFIG_LINUX_LINK_BASE; static unsigned long low_mem_sz; +#ifdef CONFIG_HIGHMEM +static unsigned long min_high_pfn; +static u64 high_mem_start; +static u64 high_mem_sz; +#endif + /* User can over-ride above with "mem=nnn[KkMm]" in cmdline */ static int __init setup_mem_sz(char *str) { @@ -41,10 +48,22 @@ early_param("mem", setup_mem_sz); void __init early_init_dt_add_memory_arch(u64 base, u64 size) { - low_mem_sz = size; - BUG_ON(base != low_mem_start); + int in_use = 0; - pr_info("Memory @ %llx of %ldM\n", base, TO_MB(size)); + if (!low_mem_sz) { + BUG_ON(base != low_mem_start); + low_mem_sz = size; + in_use = 1; + } else { +#ifdef CONFIG_HIGHMEM + high_mem_start = base; + high_mem_sz = size; + in_use = 1; +#endif + } + + pr_info("Memory @ %llx [%lldM] %s\n", + base, TO_MB(size), !in_use ? "Not used":""); } #ifdef CONFIG_BLK_DEV_INITRD @@ -74,6 +93,7 @@ early_param("initrd", early_initrd); void __init setup_arch_memory(void) { unsigned long zones_size[MAX_NR_ZONES]; + unsigned long zones_holes[MAX_NR_ZONES]; init_mm.start_code = (unsigned long)_text; init_mm.end_code = (unsigned long)_etext; @@ -86,9 +106,26 @@ void __init setup_arch_memory(void) /* Last usable page of low mem */ max_low_pfn = max_pfn = PFN_DOWN(low_mem_start + low_mem_sz); - max_mapnr = max_low_pfn - min_low_pfn; +#ifdef CONFIG_HIGHMEM + min_high_pfn = PFN_DOWN(high_mem_start); + max_pfn = PFN_DOWN(high_mem_start + high_mem_sz); +#endif + + max_mapnr = max_pfn - min_low_pfn; /*------------- bootmem allocator setup -----------------------*/ + + /* + * seed the bootmem allocator after any DT memory node parsing or + * "mem=xxx" cmdline overrides have potentially updated @arc_mem_sz + * + * Only low mem is added, otherwise we have crashes when allocating + * mem_map[] itself. NO_BOOTMEM allocates mem_map[] at the end of + * avail memory, ending in highmem with a > 32-bit address. However + * it then tries to memset it with a truncaed 32-bit handle, causing + * the crash + */ + memblock_add(low_mem_start, low_mem_sz); memblock_reserve(low_mem_start, __pa(_end) - low_mem_start); @@ -101,7 +138,17 @@ void __init setup_arch_memory(void) /*----------------- node/zones setup --------------------------*/ memset(zones_size, 0, sizeof(zones_size)); + memset(zones_holes, 0, sizeof(zones_holes)); + zones_size[ZONE_NORMAL] = max_low_pfn - min_low_pfn; + zones_holes[ZONE_NORMAL] = 0; + +#ifdef CONFIG_HIGHMEM + zones_size[ZONE_HIGHMEM] = max_pfn - max_low_pfn; + + /* This handles the peripheral address space hole */ + zones_holes[ZONE_HIGHMEM] = min_high_pfn - max_low_pfn; +#endif /* * We can't use the helper free_area_init(zones[]) because it uses @@ -112,9 +159,12 @@ void __init setup_arch_memory(void) free_area_init_node(0, /* node-id */ zones_size, /* num pages per zone */ min_low_pfn, /* first pfn of node */ - NULL); /* NO holes */ + zones_holes); /* holes */ - high_memory = (void *)end_mem; +#ifdef CONFIG_HIGHMEM + high_memory = (void *)(min_high_pfn << PAGE_SHIFT); + kmap_init(); +#endif } /* @@ -125,6 +175,14 @@ void __init setup_arch_memory(void) */ void __init mem_init(void) { +#ifdef CONFIG_HIGHMEM + unsigned long tmp; + + reset_all_zones_managed_pages(); + for (tmp = min_high_pfn; tmp < max_pfn; tmp++) + free_highmem_page(pfn_to_page(tmp)); +#endif + free_all_bootmem(); mem_init_print_info(NULL); } From 28b4af729fc4f7ee748c4bccb50ba5a6066418eb Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Mon, 14 Sep 2015 18:43:42 -0700 Subject: [PATCH 40/42] ARC: mm: PAE40: switch to using phys_addr_t for physical addresses That way a single flip of phys_addr_t to 64 bit ensures all places dealing with physical addresses get correct data Signed-off-by: Vineet Gupta --- arch/arc/include/asm/cacheflush.h | 8 ++++---- arch/arc/mm/cache.c | 26 +++++++++++++------------- arch/arc/mm/tlb.c | 10 +++++----- 3 files changed, 22 insertions(+), 22 deletions(-) diff --git a/arch/arc/include/asm/cacheflush.h b/arch/arc/include/asm/cacheflush.h index 0992d3dbcc65..fbe3587c4f36 100644 --- a/arch/arc/include/asm/cacheflush.h +++ b/arch/arc/include/asm/cacheflush.h @@ -31,10 +31,10 @@ void flush_cache_all(void); -void flush_icache_range(unsigned long start, unsigned long end); -void __sync_icache_dcache(unsigned long paddr, unsigned long vaddr, int len); -void __inv_icache_page(unsigned long paddr, unsigned long vaddr); -void __flush_dcache_page(unsigned long paddr, unsigned long vaddr); +void flush_icache_range(unsigned long kstart, unsigned long kend); +void __sync_icache_dcache(phys_addr_t paddr, unsigned long vaddr, int len); +void __inv_icache_page(phys_addr_t paddr, unsigned long vaddr); +void __flush_dcache_page(phys_addr_t paddr, unsigned long vaddr); #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1 diff --git a/arch/arc/mm/cache.c b/arch/arc/mm/cache.c index 521fb2bf90bd..91e28d791d56 100644 --- a/arch/arc/mm/cache.c +++ b/arch/arc/mm/cache.c @@ -25,7 +25,7 @@ static int l2_line_sz; int ioc_exists; volatile int slc_enable = 1, ioc_enable = 1; -void (*_cache_line_loop_ic_fn)(unsigned long paddr, unsigned long vaddr, +void (*_cache_line_loop_ic_fn)(phys_addr_t paddr, unsigned long vaddr, unsigned long sz, const int cacheop); void (*__dma_cache_wback_inv)(unsigned long start, unsigned long sz); @@ -216,7 +216,7 @@ slc_chk: */ static inline -void __cache_line_loop_v2(unsigned long paddr, unsigned long vaddr, +void __cache_line_loop_v2(phys_addr_t paddr, unsigned long vaddr, unsigned long sz, const int op) { unsigned int aux_cmd; @@ -254,7 +254,7 @@ void __cache_line_loop_v2(unsigned long paddr, unsigned long vaddr, } static inline -void __cache_line_loop_v3(unsigned long paddr, unsigned long vaddr, +void __cache_line_loop_v3(phys_addr_t paddr, unsigned long vaddr, unsigned long sz, const int op) { unsigned int aux_cmd, aux_tag; @@ -308,7 +308,7 @@ void __cache_line_loop_v3(unsigned long paddr, unsigned long vaddr, * specified in PTAG (similar to MMU v3) */ static inline -void __cache_line_loop_v4(unsigned long paddr, unsigned long vaddr, +void __cache_line_loop_v4(phys_addr_t paddr, unsigned long vaddr, unsigned long sz, const int cacheop) { unsigned int aux_cmd; @@ -412,7 +412,7 @@ static inline void __dc_entire_op(const int op) /* * D-Cache Line ops: Per Line INV (discard or wback+discard) or FLUSH (wback) */ -static inline void __dc_line_op(unsigned long paddr, unsigned long vaddr, +static inline void __dc_line_op(phys_addr_t paddr, unsigned long vaddr, unsigned long sz, const int op) { unsigned long flags; @@ -445,7 +445,7 @@ static inline void __ic_entire_inv(void) } static inline void -__ic_line_inv_vaddr_local(unsigned long paddr, unsigned long vaddr, +__ic_line_inv_vaddr_local(phys_addr_t paddr, unsigned long vaddr, unsigned long sz) { unsigned long flags; @@ -462,7 +462,7 @@ __ic_line_inv_vaddr_local(unsigned long paddr, unsigned long vaddr, #else struct ic_inv_args { - unsigned long paddr, vaddr; + phys_addr_t paddr, vaddr; int sz; }; @@ -473,7 +473,7 @@ static void __ic_line_inv_vaddr_helper(void *info) __ic_line_inv_vaddr_local(ic_inv->paddr, ic_inv->vaddr, ic_inv->sz); } -static void __ic_line_inv_vaddr(unsigned long paddr, unsigned long vaddr, +static void __ic_line_inv_vaddr(phys_addr_t paddr, unsigned long vaddr, unsigned long sz) { struct ic_inv_args ic_inv = { @@ -494,7 +494,7 @@ static void __ic_line_inv_vaddr(unsigned long paddr, unsigned long vaddr, #endif /* CONFIG_ARC_HAS_ICACHE */ -noinline void slc_op(unsigned long paddr, unsigned long sz, const int op) +noinline void slc_op(phys_addr_t paddr, unsigned long sz, const int op) { #ifdef CONFIG_ISA_ARCV2 /* @@ -584,7 +584,7 @@ void flush_dcache_page(struct page *page) } else if (page_mapped(page)) { /* kernel reading from page with U-mapping */ - unsigned long paddr = (unsigned long)page_address(page); + phys_addr_t paddr = (unsigned long)page_address(page); unsigned long vaddr = page->index << PAGE_CACHE_SHIFT; if (addr_not_cache_congruent(paddr, vaddr)) @@ -732,14 +732,14 @@ EXPORT_SYMBOL(flush_icache_range); * builtin kernel page will not have any virtual mappings. * kprobe on loadable module will be kernel vaddr. */ -void __sync_icache_dcache(unsigned long paddr, unsigned long vaddr, int len) +void __sync_icache_dcache(phys_addr_t paddr, unsigned long vaddr, int len) { __dc_line_op(paddr, vaddr, len, OP_FLUSH_N_INV); __ic_line_inv_vaddr(paddr, vaddr, len); } /* wrapper to compile time eliminate alignment checks in flush loop */ -void __inv_icache_page(unsigned long paddr, unsigned long vaddr) +void __inv_icache_page(phys_addr_t paddr, unsigned long vaddr) { __ic_line_inv_vaddr(paddr, vaddr, PAGE_SIZE); } @@ -748,7 +748,7 @@ void __inv_icache_page(unsigned long paddr, unsigned long vaddr) * wrapper to clearout kernel or userspace mappings of a page * For kernel mappings @vaddr == @paddr */ -void __flush_dcache_page(unsigned long paddr, unsigned long vaddr) +void __flush_dcache_page(phys_addr_t paddr, unsigned long vaddr) { __dc_line_op(paddr, vaddr & PAGE_MASK, PAGE_SIZE, OP_FLUSH_N_INV); } diff --git a/arch/arc/mm/tlb.c b/arch/arc/mm/tlb.c index 5dcae21dd8dc..b5f28dc0f924 100644 --- a/arch/arc/mm/tlb.c +++ b/arch/arc/mm/tlb.c @@ -499,7 +499,7 @@ void flush_tlb_kernel_range(unsigned long start, unsigned long end) /* * Routine to create a TLB entry */ -void create_tlb(struct vm_area_struct *vma, unsigned long address, pte_t *ptep) +void create_tlb(struct vm_area_struct *vma, unsigned long vaddr, pte_t *ptep) { unsigned long flags; unsigned int asid_or_sasid, rwx; @@ -535,9 +535,9 @@ void create_tlb(struct vm_area_struct *vma, unsigned long address, pte_t *ptep) local_irq_save(flags); - tlb_paranoid_check(asid_mm(vma->vm_mm, smp_processor_id()), address); + tlb_paranoid_check(asid_mm(vma->vm_mm, smp_processor_id()), vaddr); - address &= PAGE_MASK; + vaddr &= PAGE_MASK; /* update this PTE credentials */ pte_val(*ptep) |= (_PAGE_PRESENT | _PAGE_ACCESSED); @@ -547,7 +547,7 @@ void create_tlb(struct vm_area_struct *vma, unsigned long address, pte_t *ptep) /* ASID for this task */ asid_or_sasid = read_aux_reg(ARC_REG_PID) & 0xff; - pd0 = address | asid_or_sasid | (pte_val(*ptep) & PTE_BITS_IN_PD0); + pd0 = vaddr | asid_or_sasid | (pte_val(*ptep) & PTE_BITS_IN_PD0); /* * ARC MMU provides fully orthogonal access bits for K/U mode, @@ -583,7 +583,7 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long vaddr_unaligned, pte_t *ptep) { unsigned long vaddr = vaddr_unaligned & PAGE_MASK; - unsigned long paddr = pte_val(*ptep) & PAGE_MASK; + phys_addr_t paddr = pte_val(*ptep) & PAGE_MASK; struct page *page = pfn_to_page(pte_pfn(*ptep)); create_tlb(vma, vaddr, ptep); From 25d464183ca3522ae27ec1bbef5ddcbbbef65017 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Mon, 5 Oct 2015 15:41:36 +0530 Subject: [PATCH 41/42] ARC: mm: PAE40: tlbex.S: Explicitify the size of pte_t Signed-off-by: Vineet Gupta --- arch/arc/mm/tlbex.S | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/arch/arc/mm/tlbex.S b/arch/arc/mm/tlbex.S index 552594897655..8d1b81990bc1 100644 --- a/arch/arc/mm/tlbex.S +++ b/arch/arc/mm/tlbex.S @@ -221,12 +221,15 @@ ex_saved_reg1: ; Get the PTE entry: The idea is ; (1) x = addr >> PAGE_SHIFT -> masks page-off bits from @fault-addr ; (2) y = x & (PTRS_PER_PTE - 1) -> to get index - ; (3) z = pgtbl[y] - ; To avoid the multiply by in end, we do the -2, <<2 below + ; (3) z = (pgtbl + y * 4) - lsr r0, r2, (PAGE_SHIFT - 2) - and r0, r0, ( (PTRS_PER_PTE - 1) << 2) - ld.aw r0, [r1, r0] ; get PTE and PTE ptr for fault addr +#define PTE_SIZE_LOG 2 /* 4 == 2 ^ 2 */ + + ; multiply in step (3) above avoided by shifting lesser in step (1) + lsr r0, r2, ( PAGE_SHIFT - PTE_SIZE_LOG ) + and r0, r0, ( (PTRS_PER_PTE - 1) << PTE_SIZE_LOG ) + ld.aw r0, [r1, r0] ; r0: PTE + ; r1: PTE ptr 2: @@ -247,15 +250,15 @@ ex_saved_reg1: ; IN: r0 = PTE, r1 = ptr to PTE .macro CONV_PTE_TO_TLB - and r3, r0, PTE_BITS_RWX ; r w x - lsl r2, r3, 3 ; r w x 0 0 0 (GLOBAL, kernel only) + and r3, r0, PTE_BITS_RWX ; r w x + lsl r2, r3, 3 ; Kr Kw Kx 0 0 0 (GLOBAL, kernel only) and.f 0, r0, _PAGE_GLOBAL - or.z r2, r2, r3 ; r w x r w x (!GLOBAL, user page) + or.z r2, r2, r3 ; Kr Kw Kx Ur Uw Ux (!GLOBAL, user page) and r3, r0, PTE_BITS_NON_RWX_IN_PD1 ; Extract PFN+cache bits from PTE or r3, r3, r2 - sr r3, [ARC_REG_TLBPD1] ; these go in PD1 + sr r3, [ARC_REG_TLBPD1] ; paddr[31..13] | Kr Kw Kx Ur Uw Ux | C and r2, r0, PTE_BITS_IN_PD0 ; Extract other PTE flags: (V)alid, (G)lb From 5a364c2a1762e8a78721fafc93144509c0b6cb84 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Fri, 6 Feb 2015 18:44:57 +0300 Subject: [PATCH 42/42] ARC: mm: PAE40 support This is the first working implementation of 40-bit physical address extension on ARCv2. Signed-off-by: Alexey Brodkin Signed-off-by: Vineet Gupta --- arch/arc/Kconfig | 15 ++++++++++++ arch/arc/include/asm/cache.h | 2 ++ arch/arc/include/asm/mmu.h | 7 ++++++ arch/arc/include/asm/page.h | 4 ++++ arch/arc/include/asm/pgalloc.h | 6 ++--- arch/arc/include/asm/pgtable.h | 8 ++++++- arch/arc/mm/cache.c | 44 ++++++++++++++++++++++++++++++---- arch/arc/mm/tlb.c | 27 +++++++++++++++++---- arch/arc/mm/tlbex.S | 11 ++++++++- 9 files changed, 110 insertions(+), 14 deletions(-) diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig index fd9632f4ddc8..2c2ac3f3ff80 100644 --- a/arch/arc/Kconfig +++ b/arch/arc/Kconfig @@ -453,6 +453,21 @@ config HIGHMEM kernel. Enable this to potentially allow access to rest of 2G and PAE in future +config ARC_HAS_PAE40 + bool "Support for the 40-bit Physical Address Extension" + default n + depends on ISA_ARCV2 + select HIGHMEM + help + Enable access to physical memory beyond 4G, only supported on + ARC cores with 40 bit Physical Addressing support + +config ARCH_PHYS_ADDR_T_64BIT + def_bool ARC_HAS_PAE40 + +config ARCH_DMA_ADDR_T_64BIT + bool + config ARC_CURR_IN_REG bool "Dedicate Register r25 for current_task pointer" default y diff --git a/arch/arc/include/asm/cache.h b/arch/arc/include/asm/cache.h index e23ea6e7633a..abf06e81c929 100644 --- a/arch/arc/include/asm/cache.h +++ b/arch/arc/include/asm/cache.h @@ -65,6 +65,7 @@ extern int ioc_exists; #if defined(CONFIG_ARC_MMU_V3) || defined(CONFIG_ARC_MMU_V4) #define ARC_REG_IC_PTAG 0x1E #endif +#define ARC_REG_IC_PTAG_HI 0x1F /* Bit val in IC_CTRL */ #define IC_CTRL_CACHE_DISABLE 0x1 @@ -77,6 +78,7 @@ extern int ioc_exists; #define ARC_REG_DC_FLSH 0x4B #define ARC_REG_DC_FLDL 0x4C #define ARC_REG_DC_PTAG 0x5C +#define ARC_REG_DC_PTAG_HI 0x5F /* Bit val in DC_CTRL */ #define DC_CTRL_INV_MODE_FLUSH 0x40 diff --git a/arch/arc/include/asm/mmu.h b/arch/arc/include/asm/mmu.h index 0f9c3eb5327e..b144d7ca7d20 100644 --- a/arch/arc/include/asm/mmu.h +++ b/arch/arc/include/asm/mmu.h @@ -24,6 +24,7 @@ #if (CONFIG_ARC_MMU_VER < 4) #define ARC_REG_TLBPD0 0x405 #define ARC_REG_TLBPD1 0x406 +#define ARC_REG_TLBPD1HI 0 /* Dummy: allows code sharing with ARC700 */ #define ARC_REG_TLBINDEX 0x407 #define ARC_REG_TLBCOMMAND 0x408 #define ARC_REG_PID 0x409 @@ -31,6 +32,7 @@ #else #define ARC_REG_TLBPD0 0x460 #define ARC_REG_TLBPD1 0x461 +#define ARC_REG_TLBPD1HI 0x463 #define ARC_REG_TLBINDEX 0x464 #define ARC_REG_TLBCOMMAND 0x465 #define ARC_REG_PID 0x468 @@ -83,6 +85,11 @@ void arc_mmu_init(void); extern char *arc_mmu_mumbojumbo(int cpu_id, char *buf, int len); void read_decode_mmu_bcr(void); +static inline int is_pae40_enabled(void) +{ + return IS_ENABLED(CONFIG_ARC_HAS_PAE40); +} + #endif /* !__ASSEMBLY__ */ #endif diff --git a/arch/arc/include/asm/page.h b/arch/arc/include/asm/page.h index 37706837ef75..429957f1c236 100644 --- a/arch/arc/include/asm/page.h +++ b/arch/arc/include/asm/page.h @@ -56,7 +56,11 @@ typedef struct { #else /* !STRICT_MM_TYPECHECKS */ +#ifdef CONFIG_ARC_HAS_PAE40 +typedef unsigned long long pte_t; +#else typedef unsigned long pte_t; +#endif typedef unsigned long pgd_t; typedef unsigned long pgprot_t; diff --git a/arch/arc/include/asm/pgalloc.h b/arch/arc/include/asm/pgalloc.h index 9149b5ca26d7..86ed671286df 100644 --- a/arch/arc/include/asm/pgalloc.h +++ b/arch/arc/include/asm/pgalloc.h @@ -49,7 +49,7 @@ pmd_populate(struct mm_struct *mm, pmd_t *pmd, pgtable_t ptep) static inline int __get_order_pgd(void) { - return get_order(PTRS_PER_PGD * 4); + return get_order(PTRS_PER_PGD * sizeof(pgd_t)); } static inline pgd_t *pgd_alloc(struct mm_struct *mm) @@ -87,7 +87,7 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd) static inline int __get_order_pte(void) { - return get_order(PTRS_PER_PTE * 4); + return get_order(PTRS_PER_PTE * sizeof(pte_t)); } static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, @@ -110,7 +110,7 @@ pte_alloc_one(struct mm_struct *mm, unsigned long address) pte_pg = (pgtable_t)__get_free_pages(GFP_KERNEL | __GFP_REPEAT, __get_order_pte()); if (!pte_pg) return 0; - memzero((void *)pte_pg, PTRS_PER_PTE * 4); + memzero((void *)pte_pg, PTRS_PER_PTE * sizeof(pte_t)); page = virt_to_page(pte_pg); if (!pgtable_page_ctor(page)) { __free_page(page); diff --git a/arch/arc/include/asm/pgtable.h b/arch/arc/include/asm/pgtable.h index bd771351a1d1..57af2f05ae84 100644 --- a/arch/arc/include/asm/pgtable.h +++ b/arch/arc/include/asm/pgtable.h @@ -134,7 +134,12 @@ /* Masks for actual TLB "PD"s */ #define PTE_BITS_IN_PD0 (_PAGE_GLOBAL | _PAGE_PRESENT | _PAGE_HW_SZ) #define PTE_BITS_RWX (_PAGE_EXECUTE | _PAGE_WRITE | _PAGE_READ) + +#ifdef CONFIG_ARC_HAS_PAE40 +#define PTE_BITS_NON_RWX_IN_PD1 (0xff00000000 | PAGE_MASK | _PAGE_CACHEABLE) +#else #define PTE_BITS_NON_RWX_IN_PD1 (PAGE_MASK | _PAGE_CACHEABLE) +#endif /************************************************************************** * Mapping of vm_flags (Generic VM) to PTE flags (arch specific) @@ -272,7 +277,8 @@ static inline void pmd_set(pmd_t *pmdp, pte_t *ptep) #define mk_pte(page, prot) pfn_pte(page_to_pfn(page), prot) #define pte_pfn(pte) (pte_val(pte) >> PAGE_SHIFT) -#define pfn_pte(pfn, prot) (__pte(((pfn) << PAGE_SHIFT) | pgprot_val(prot))) +#define pfn_pte(pfn, prot) (__pte(((pte_t)(pfn) << PAGE_SHIFT) | \ + pgprot_val(prot))) #define __pte_index(addr) (((addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) /* diff --git a/arch/arc/mm/cache.c b/arch/arc/mm/cache.c index 91e28d791d56..ff7ff6cbb811 100644 --- a/arch/arc/mm/cache.c +++ b/arch/arc/mm/cache.c @@ -253,6 +253,10 @@ void __cache_line_loop_v2(phys_addr_t paddr, unsigned long vaddr, } } +/* + * For ARC700 MMUv3 I-cache and D-cache flushes + * Also reused for HS38 aliasing I-cache configuration + */ static inline void __cache_line_loop_v3(phys_addr_t paddr, unsigned long vaddr, unsigned long sz, const int op) @@ -289,6 +293,16 @@ void __cache_line_loop_v3(phys_addr_t paddr, unsigned long vaddr, if (full_page) write_aux_reg(aux_tag, paddr); + /* + * This is technically for MMU v4, using the MMU v3 programming model + * Special work for HS38 aliasing I-cache configuratino with PAE40 + * - upper 8 bits of paddr need to be written into PTAG_HI + * - (and needs to be written before the lower 32 bits) + * Note that PTAG_HI is hoisted outside the line loop + */ + if (is_pae40_enabled() && op == OP_INV_IC) + write_aux_reg(ARC_REG_IC_PTAG_HI, (u64)paddr >> 32); + while (num_lines-- > 0) { if (!full_page) { write_aux_reg(aux_tag, paddr); @@ -301,11 +315,17 @@ void __cache_line_loop_v3(phys_addr_t paddr, unsigned long vaddr, } /* - * In HS38x (MMU v4), although icache is VIPT, only paddr is needed for cache - * maintenance ops (in IVIL reg), as long as icache doesn't alias. + * In HS38x (MMU v4), I-cache is VIPT (can alias), D-cache is PIPT + * Here's how cache ops are implemented * - * For Aliasing icache, vaddr is also needed (in IVIL), while paddr is - * specified in PTAG (similar to MMU v3) + * - D-cache: only paddr needed (in DC_IVDL/DC_FLDL) + * - I-cache Non Aliasing: Despite VIPT, only paddr needed (in IC_IVIL) + * - I-cache Aliasing: Both vaddr and paddr needed (in IC_IVIL, IC_PTAG + * respectively, similar to MMU v3 programming model, hence + * __cache_line_loop_v3() is used) + * + * If PAE40 is enabled, independent of aliasing considerations, the higher bits + * needs to be written into PTAG_HI */ static inline void __cache_line_loop_v4(phys_addr_t paddr, unsigned long vaddr, @@ -335,6 +355,22 @@ void __cache_line_loop_v4(phys_addr_t paddr, unsigned long vaddr, num_lines = DIV_ROUND_UP(sz, L1_CACHE_BYTES); + /* + * For HS38 PAE40 configuration + * - upper 8 bits of paddr need to be written into PTAG_HI + * - (and needs to be written before the lower 32 bits) + */ + if (is_pae40_enabled()) { + if (cacheop == OP_INV_IC) + /* + * Non aliasing I-cache in HS38, + * aliasing I-cache handled in __cache_line_loop_v3() + */ + write_aux_reg(ARC_REG_IC_PTAG_HI, (u64)paddr >> 32); + else + write_aux_reg(ARC_REG_DC_PTAG_HI, (u64)paddr >> 32); + } + while (num_lines-- > 0) { write_aux_reg(aux_cmd, paddr); paddr += L1_CACHE_BYTES; diff --git a/arch/arc/mm/tlb.c b/arch/arc/mm/tlb.c index b5f28dc0f924..0ee739846847 100644 --- a/arch/arc/mm/tlb.c +++ b/arch/arc/mm/tlb.c @@ -109,6 +109,10 @@ DEFINE_PER_CPU(unsigned int, asid_cache) = MM_CTXT_FIRST_CYCLE; static inline void __tlb_entry_erase(void) { write_aux_reg(ARC_REG_TLBPD1, 0); + + if (is_pae40_enabled()) + write_aux_reg(ARC_REG_TLBPD1HI, 0); + write_aux_reg(ARC_REG_TLBPD0, 0); write_aux_reg(ARC_REG_TLBCOMMAND, TLBWrite); } @@ -182,7 +186,7 @@ static void utlb_invalidate(void) } -static void tlb_entry_insert(unsigned int pd0, unsigned int pd1) +static void tlb_entry_insert(unsigned int pd0, pte_t pd1) { unsigned int idx; @@ -225,10 +229,14 @@ static void tlb_entry_erase(unsigned int vaddr_n_asid) write_aux_reg(ARC_REG_TLBCOMMAND, TLBDeleteEntry); } -static void tlb_entry_insert(unsigned int pd0, unsigned int pd1) +static void tlb_entry_insert(unsigned int pd0, pte_t pd1) { write_aux_reg(ARC_REG_TLBPD0, pd0); write_aux_reg(ARC_REG_TLBPD1, pd1); + + if (is_pae40_enabled()) + write_aux_reg(ARC_REG_TLBPD1HI, (u64)pd1 >> 32); + write_aux_reg(ARC_REG_TLBCOMMAND, TLBInsertEntry); } @@ -249,6 +257,10 @@ noinline void local_flush_tlb_all(void) /* Load PD0 and PD1 with template for a Blank Entry */ write_aux_reg(ARC_REG_TLBPD1, 0); + + if (is_pae40_enabled()) + write_aux_reg(ARC_REG_TLBPD1HI, 0); + write_aux_reg(ARC_REG_TLBPD0, 0); for (entry = 0; entry < num_tlb; entry++) { @@ -503,7 +515,8 @@ void create_tlb(struct vm_area_struct *vma, unsigned long vaddr, pte_t *ptep) { unsigned long flags; unsigned int asid_or_sasid, rwx; - unsigned long pd0, pd1; + unsigned long pd0; + pte_t pd1; /* * create_tlb() assumes that current->mm == vma->mm, since @@ -785,10 +798,11 @@ char *arc_mmu_mumbojumbo(int cpu_id, char *buf, int len) IS_USED_CFG(CONFIG_TRANSPARENT_HUGEPAGE)); n += scnprintf(buf + n, len - n, - "MMU [v%x]\t: %dK PAGE, %sJTLB %d (%dx%d), uDTLB %d, uITLB %d\n", + "MMU [v%x]\t: %dk PAGE, %sJTLB %d (%dx%d), uDTLB %d, uITLB %d %s%s\n", p_mmu->ver, p_mmu->pg_sz_k, super_pg, p_mmu->sets * p_mmu->ways, p_mmu->sets, p_mmu->ways, - p_mmu->u_dtlb, p_mmu->u_itlb); + p_mmu->u_dtlb, p_mmu->u_itlb, + IS_AVAIL2(p_mmu->pae, "PAE40 ", CONFIG_ARC_HAS_PAE40)); return buf; } @@ -821,6 +835,9 @@ void arc_mmu_init(void) panic("MMU Super pg size != Linux HPAGE_PMD_SIZE (%luM)\n", (unsigned long)TO_MB(HPAGE_PMD_SIZE)); + if (IS_ENABLED(CONFIG_ARC_HAS_PAE40) && !mmu->pae) + panic("Hardware doesn't support PAE40\n"); + /* Enable the MMU */ write_aux_reg(ARC_REG_PID, MMU_ENABLE); diff --git a/arch/arc/mm/tlbex.S b/arch/arc/mm/tlbex.S index 8d1b81990bc1..63860adc4814 100644 --- a/arch/arc/mm/tlbex.S +++ b/arch/arc/mm/tlbex.S @@ -223,12 +223,16 @@ ex_saved_reg1: ; (2) y = x & (PTRS_PER_PTE - 1) -> to get index ; (3) z = (pgtbl + y * 4) +#ifdef CONFIG_ARC_HAS_PAE40 +#define PTE_SIZE_LOG 3 /* 8 == 2 ^ 3 */ +#else #define PTE_SIZE_LOG 2 /* 4 == 2 ^ 2 */ +#endif ; multiply in step (3) above avoided by shifting lesser in step (1) lsr r0, r2, ( PAGE_SHIFT - PTE_SIZE_LOG ) and r0, r0, ( (PTRS_PER_PTE - 1) << PTE_SIZE_LOG ) - ld.aw r0, [r1, r0] ; r0: PTE + ld.aw r0, [r1, r0] ; r0: PTE (lower word only for PAE40) ; r1: PTE ptr 2: @@ -247,6 +251,7 @@ ex_saved_reg1: ;----------------------------------------------------------------- ; Convert Linux PTE entry into TLB entry ; A one-word PTE entry is programmed as two-word TLB Entry [PD0:PD1] in mmu +; (for PAE40, two-words PTE, while three-word TLB Entry [PD0:PD1:PD1HI]) ; IN: r0 = PTE, r1 = ptr to PTE .macro CONV_PTE_TO_TLB @@ -259,6 +264,10 @@ ex_saved_reg1: or r3, r3, r2 sr r3, [ARC_REG_TLBPD1] ; paddr[31..13] | Kr Kw Kx Ur Uw Ux | C +#ifdef CONFIG_ARC_HAS_PAE40 + ld r3, [r1, 4] ; paddr[39..32] + sr r3, [ARC_REG_TLBPD1HI] +#endif and r2, r0, PTE_BITS_IN_PD0 ; Extract other PTE flags: (V)alid, (G)lb