From 21e450d21ccad4cb7c7984c29ff145012b47736d Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Tue, 18 Jun 2019 09:32:11 +0200 Subject: [PATCH 1/8] x86/mm: Avoid redundant interrupt disable in load_mm_cr4() load_mm_cr4() is always called with interrupts disabled from: - switch_mm_irqs_off() - refresh_pce(), which is a on_each_cpu() callback Thus, disabling interrupts in cr4_set/clear_bits() is redundant. Implement cr4_set/clear_bits_irqsoff() helpers, rename load_mm_cr4() to load_mm_cr4_irqsoff() and use the new helpers. The new helpers do not need a lockdep assert as __cr4_set() has one already. The renaming in combination with the checks in __cr4_set() ensure that any changes in the boundary conditions at the call sites will be detected. [ tglx: Massaged change log ] Signed-off-by: Jan Kiszka Signed-off-by: Thomas Gleixner Link: https://lkml.kernel.org/r/0fbbcb64-5f26-4ffb-1bb9-4f5f48426893@siemens.com --- arch/x86/events/core.c | 2 +- arch/x86/include/asm/mmu_context.h | 8 ++++---- arch/x86/include/asm/tlbflush.h | 30 +++++++++++++++++++++++------- arch/x86/mm/tlb.c | 2 +- 4 files changed, 29 insertions(+), 13 deletions(-) diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index 81b005e4c7d9..cfe256ca76df 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -2087,7 +2087,7 @@ static int x86_pmu_event_init(struct perf_event *event) static void refresh_pce(void *ignored) { - load_mm_cr4(this_cpu_read(cpu_tlbstate.loaded_mm)); + load_mm_cr4_irqsoff(this_cpu_read(cpu_tlbstate.loaded_mm)); } static void x86_pmu_event_mapped(struct perf_event *event, struct mm_struct *mm) diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h index 9024236693d2..16ae821483c8 100644 --- a/arch/x86/include/asm/mmu_context.h +++ b/arch/x86/include/asm/mmu_context.h @@ -28,16 +28,16 @@ static inline void paravirt_activate_mm(struct mm_struct *prev, DECLARE_STATIC_KEY_FALSE(rdpmc_always_available_key); -static inline void load_mm_cr4(struct mm_struct *mm) +static inline void load_mm_cr4_irqsoff(struct mm_struct *mm) { if (static_branch_unlikely(&rdpmc_always_available_key) || atomic_read(&mm->context.perf_rdpmc_allowed)) - cr4_set_bits(X86_CR4_PCE); + cr4_set_bits_irqsoff(X86_CR4_PCE); else - cr4_clear_bits(X86_CR4_PCE); + cr4_clear_bits_irqsoff(X86_CR4_PCE); } #else -static inline void load_mm_cr4(struct mm_struct *mm) {} +static inline void load_mm_cr4_irqsoff(struct mm_struct *mm) {} #endif #ifdef CONFIG_MODIFY_LDT_SYSCALL diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index dee375831962..6f66d841262d 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -290,26 +290,42 @@ static inline void __cr4_set(unsigned long cr4) } /* Set in this cpu's CR4. */ -static inline void cr4_set_bits(unsigned long mask) +static inline void cr4_set_bits_irqsoff(unsigned long mask) { - unsigned long cr4, flags; + unsigned long cr4; - local_irq_save(flags); cr4 = this_cpu_read(cpu_tlbstate.cr4); if ((cr4 | mask) != cr4) __cr4_set(cr4 | mask); +} + +/* Clear in this cpu's CR4. */ +static inline void cr4_clear_bits_irqsoff(unsigned long mask) +{ + unsigned long cr4; + + cr4 = this_cpu_read(cpu_tlbstate.cr4); + if ((cr4 & ~mask) != cr4) + __cr4_set(cr4 & ~mask); +} + +/* Set in this cpu's CR4. */ +static inline void cr4_set_bits(unsigned long mask) +{ + unsigned long flags; + + local_irq_save(flags); + cr4_set_bits_irqsoff(mask); local_irq_restore(flags); } /* Clear in this cpu's CR4. */ static inline void cr4_clear_bits(unsigned long mask) { - unsigned long cr4, flags; + unsigned long flags; local_irq_save(flags); - cr4 = this_cpu_read(cpu_tlbstate.cr4); - if ((cr4 & ~mask) != cr4) - __cr4_set(cr4 & ~mask); + cr4_clear_bits_irqsoff(mask); local_irq_restore(flags); } diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 4de9704c4aaf..e6a9edc5baaf 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -440,7 +440,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, this_cpu_write(cpu_tlbstate.loaded_mm_asid, new_asid); if (next != real_prev) { - load_mm_cr4(next); + load_mm_cr4_irqsoff(next); switch_ldt(real_prev, next); } } From 2e1da13fba4cb529c2c8c1d9f657690d1e853d7d Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Wed, 7 Aug 2019 15:02:58 +0200 Subject: [PATCH 2/8] x86/kconfig: Remove X86_DIRECT_GBPAGES dependency on !DEBUG_PAGEALLOC These days CONFIG_DEBUG_PAGEALLOC just compiles in the code that has to be enabled on boot time, or with an extra config option, and only then are the large page based direct mappings disabled. Therefore remove the config dependency, allowing 1GB direct mappings with debug_pagealloc compiled in but not enabled. Signed-off-by: Vlastimil Babka Signed-off-by: Thomas Gleixner Link: https://lkml.kernel.org/r/20190807130258.22185-1-vbabka@suse.cz --- arch/x86/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 222855cc0158..58eae28c3dd6 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1503,7 +1503,7 @@ config X86_5LEVEL config X86_DIRECT_GBPAGES def_bool y - depends on X86_64 && !DEBUG_PAGEALLOC + depends on X86_64 ---help--- Certain kernel features effectively disable kernel linear 1 GB mappings (even if the CPU otherwise From c84b82dd3e593db217f23c60f7edae02c76a3c4c Mon Sep 17 00:00:00 2001 From: Cao jin Date: Fri, 9 Aug 2019 19:46:12 +0800 Subject: [PATCH 3/8] x86/fixmap: Cleanup outdated comments Remove stale comments and fix the not longer valid pagetable entry reference. Signed-off-by: Cao jin Signed-off-by: Thomas Gleixner Link: https://lkml.kernel.org/r/20190809114612.2569-1-caoj.fnst@cn.fujitsu.com --- arch/x86/include/asm/fixmap.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h index 9da8cccdf3fb..0c47aa82e2e2 100644 --- a/arch/x86/include/asm/fixmap.h +++ b/arch/x86/include/asm/fixmap.h @@ -42,8 +42,7 @@ * Because of this, FIXADDR_TOP x86 integration was left as later work. */ #ifdef CONFIG_X86_32 -/* used by vmalloc.c, vsyscall.lds.S. - * +/* * Leave one empty page between vmalloc'ed areas and * the start of the fixmap. */ @@ -120,7 +119,7 @@ enum fixed_addresses { * before ioremap() is functional. * * If necessary we round it up to the next 512 pages boundary so - * that we can have a single pgd entry and a single pte table: + * that we can have a single pmd entry and a single pte table: */ #define NR_FIX_BTMAPS 64 #define FIX_BTMAPS_SLOTS 8 From ec46133d3b81053701e2a29047dfb6228ff487bd Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 26 Aug 2019 09:55:54 +0200 Subject: [PATCH 4/8] x86/mm: Unexport set_memory_x() and set_memory_nx() No module currently messed with clearing or setting the execute permission of kernel memory, and none really should. Signed-off-by: Christoph Hellwig Signed-off-by: Peter Zijlstra (Intel) Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Rik van Riel Cc: Thomas Gleixner Link: https://lkml.kernel.org/r/20190826075558.8125-2-hch@lst.de Signed-off-by: Ingo Molnar --- arch/x86/mm/pageattr.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index e14e95ea7338..08a6f04a5c6d 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -1977,7 +1977,6 @@ int set_memory_x(unsigned long addr, int numpages) return change_page_attr_clear(&addr, numpages, __pgprot(_PAGE_NX), 0); } -EXPORT_SYMBOL(set_memory_x); int set_memory_nx(unsigned long addr, int numpages) { @@ -1986,7 +1985,6 @@ int set_memory_nx(unsigned long addr, int numpages) return change_page_attr_set(&addr, numpages, __pgprot(_PAGE_NX), 0); } -EXPORT_SYMBOL(set_memory_nx); int set_memory_ro(unsigned long addr, int numpages) { From a919198b97c85e093c81eaae0b4864206ec2fe02 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 26 Aug 2019 09:55:55 +0200 Subject: [PATCH 5/8] x86/mm: Remove the unused set_memory_array_*() functions Signed-off-by: Christoph Hellwig Signed-off-by: Peter Zijlstra (Intel) Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Rik van Riel Cc: Thomas Gleixner Link: https://lkml.kernel.org/r/20190826075558.8125-3-hch@lst.de Signed-off-by: Ingo Molnar --- arch/x86/include/asm/set_memory.h | 5 --- arch/x86/mm/pageattr.c | 75 ------------------------------- 2 files changed, 80 deletions(-) diff --git a/arch/x86/include/asm/set_memory.h b/arch/x86/include/asm/set_memory.h index ae7b909dc242..899ec9ae7cff 100644 --- a/arch/x86/include/asm/set_memory.h +++ b/arch/x86/include/asm/set_memory.h @@ -48,11 +48,6 @@ int set_memory_encrypted(unsigned long addr, int numpages); int set_memory_decrypted(unsigned long addr, int numpages); int set_memory_np_noalias(unsigned long addr, int numpages); -int set_memory_array_uc(unsigned long *addr, int addrinarray); -int set_memory_array_wc(unsigned long *addr, int addrinarray); -int set_memory_array_wt(unsigned long *addr, int addrinarray); -int set_memory_array_wb(unsigned long *addr, int addrinarray); - int set_pages_array_uc(struct page **pages, int addrinarray); int set_pages_array_wc(struct page **pages, int addrinarray); int set_pages_array_wt(struct page **pages, int addrinarray); diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 08a6f04a5c6d..1f97a726d09a 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -1819,63 +1819,6 @@ out_err: } EXPORT_SYMBOL(set_memory_uc); -static int _set_memory_array(unsigned long *addr, int numpages, - enum page_cache_mode new_type) -{ - enum page_cache_mode set_type; - int i, j; - int ret; - - for (i = 0; i < numpages; i++) { - ret = reserve_memtype(__pa(addr[i]), __pa(addr[i]) + PAGE_SIZE, - new_type, NULL); - if (ret) - goto out_free; - } - - /* If WC, set to UC- first and then WC */ - set_type = (new_type == _PAGE_CACHE_MODE_WC) ? - _PAGE_CACHE_MODE_UC_MINUS : new_type; - - ret = change_page_attr_set(addr, numpages, - cachemode2pgprot(set_type), 1); - - if (!ret && new_type == _PAGE_CACHE_MODE_WC) - ret = change_page_attr_set_clr(addr, numpages, - cachemode2pgprot( - _PAGE_CACHE_MODE_WC), - __pgprot(_PAGE_CACHE_MASK), - 0, CPA_ARRAY, NULL); - if (ret) - goto out_free; - - return 0; - -out_free: - for (j = 0; j < i; j++) - free_memtype(__pa(addr[j]), __pa(addr[j]) + PAGE_SIZE); - - return ret; -} - -int set_memory_array_uc(unsigned long *addr, int numpages) -{ - return _set_memory_array(addr, numpages, _PAGE_CACHE_MODE_UC_MINUS); -} -EXPORT_SYMBOL(set_memory_array_uc); - -int set_memory_array_wc(unsigned long *addr, int numpages) -{ - return _set_memory_array(addr, numpages, _PAGE_CACHE_MODE_WC); -} -EXPORT_SYMBOL(set_memory_array_wc); - -int set_memory_array_wt(unsigned long *addr, int numpages) -{ - return _set_memory_array(addr, numpages, _PAGE_CACHE_MODE_WT); -} -EXPORT_SYMBOL_GPL(set_memory_array_wt); - int _set_memory_wc(unsigned long addr, int numpages) { int ret; @@ -1952,24 +1895,6 @@ int set_memory_wb(unsigned long addr, int numpages) } EXPORT_SYMBOL(set_memory_wb); -int set_memory_array_wb(unsigned long *addr, int numpages) -{ - int i; - int ret; - - /* WB cache mode is hard wired to all cache attribute bits being 0 */ - ret = change_page_attr_clear(addr, numpages, - __pgprot(_PAGE_CACHE_MASK), 1); - if (ret) - return ret; - - for (i = 0; i < numpages; i++) - free_memtype(__pa(addr[i]), __pa(addr[i]) + PAGE_SIZE); - - return 0; -} -EXPORT_SYMBOL(set_memory_array_wb); - int set_memory_x(unsigned long addr, int numpages) { if (!(__supported_pte_mask & _PAGE_NX)) From 185be15143aa308184310df9fde3d409ca9f83bb Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 26 Aug 2019 09:55:56 +0200 Subject: [PATCH 6/8] x86/mm: Remove set_pages_x() and set_pages_nx() These wrappers don't provide a real benefit over just using set_memory_x() and set_memory_nx(). Signed-off-by: Christoph Hellwig Signed-off-by: Peter Zijlstra (Intel) Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Rik van Riel Cc: Thomas Gleixner Link: https://lkml.kernel.org/r/20190826075558.8125-4-hch@lst.de Signed-off-by: Ingo Molnar --- arch/x86/include/asm/set_memory.h | 2 -- arch/x86/kernel/machine_kexec_32.c | 4 ++-- arch/x86/mm/init_32.c | 2 +- arch/x86/mm/pageattr.c | 16 ---------------- 4 files changed, 3 insertions(+), 21 deletions(-) diff --git a/arch/x86/include/asm/set_memory.h b/arch/x86/include/asm/set_memory.h index 899ec9ae7cff..fd549c3ebb17 100644 --- a/arch/x86/include/asm/set_memory.h +++ b/arch/x86/include/asm/set_memory.h @@ -75,8 +75,6 @@ int set_pages_array_wb(struct page **pages, int addrinarray); int set_pages_uc(struct page *page, int numpages); int set_pages_wb(struct page *page, int numpages); -int set_pages_x(struct page *page, int numpages); -int set_pages_nx(struct page *page, int numpages); int set_pages_ro(struct page *page, int numpages); int set_pages_rw(struct page *page, int numpages); diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c index 77854b192fef..7b45e8daad22 100644 --- a/arch/x86/kernel/machine_kexec_32.c +++ b/arch/x86/kernel/machine_kexec_32.c @@ -148,7 +148,7 @@ int machine_kexec_prepare(struct kimage *image) { int error; - set_pages_x(image->control_code_page, 1); + set_memory_x((unsigned long)page_address(image->control_code_page), 1); error = machine_kexec_alloc_page_tables(image); if (error) return error; @@ -162,7 +162,7 @@ int machine_kexec_prepare(struct kimage *image) */ void machine_kexec_cleanup(struct kimage *image) { - set_pages_nx(image->control_code_page, 1); + set_memory_nx((unsigned long)page_address(image->control_code_page), 1); machine_kexec_free_page_tables(image); } diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 4068abb9427f..930edeb41ec3 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -916,7 +916,7 @@ static void mark_nxdata_nx(void) if (__supported_pte_mask & _PAGE_NX) printk(KERN_INFO "NX-protecting the kernel data: %luk\n", size >> 10); - set_pages_nx(virt_to_page(start), size >> PAGE_SHIFT); + set_memory_nx(start, size >> PAGE_SHIFT); } void mark_rodata_ro(void) diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 1f97a726d09a..d5586a012745 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -2113,22 +2113,6 @@ int set_pages_array_wb(struct page **pages, int numpages) } EXPORT_SYMBOL(set_pages_array_wb); -int set_pages_x(struct page *page, int numpages) -{ - unsigned long addr = (unsigned long)page_address(page); - - return set_memory_x(addr, numpages); -} -EXPORT_SYMBOL(set_pages_x); - -int set_pages_nx(struct page *page, int numpages) -{ - unsigned long addr = (unsigned long)page_address(page); - - return set_memory_nx(addr, numpages); -} -EXPORT_SYMBOL(set_pages_nx); - int set_pages_ro(struct page *page, int numpages) { unsigned long addr = (unsigned long)page_address(page); From aeb415fbe9f62e6db0fabd2023d39728ccc705fd Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 26 Aug 2019 09:55:57 +0200 Subject: [PATCH 7/8] x86/mm: Remove the unused set_memory_wt() function Signed-off-by: Christoph Hellwig Signed-off-by: Peter Zijlstra (Intel) Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Rik van Riel Cc: Thomas Gleixner Link: https://lkml.kernel.org/r/20190826075558.8125-5-hch@lst.de Signed-off-by: Ingo Molnar --- arch/x86/include/asm/set_memory.h | 1 - arch/x86/mm/pageattr.c | 17 ----------------- 2 files changed, 18 deletions(-) diff --git a/arch/x86/include/asm/set_memory.h b/arch/x86/include/asm/set_memory.h index fd549c3ebb17..2ee8e469dcf5 100644 --- a/arch/x86/include/asm/set_memory.h +++ b/arch/x86/include/asm/set_memory.h @@ -40,7 +40,6 @@ int _set_memory_wt(unsigned long addr, int numpages); int _set_memory_wb(unsigned long addr, int numpages); int set_memory_uc(unsigned long addr, int numpages); int set_memory_wc(unsigned long addr, int numpages); -int set_memory_wt(unsigned long addr, int numpages); int set_memory_wb(unsigned long addr, int numpages); int set_memory_np(unsigned long addr, int numpages); int set_memory_4k(unsigned long addr, int numpages); diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index d5586a012745..0d09cc5aad61 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -1858,23 +1858,6 @@ int _set_memory_wt(unsigned long addr, int numpages) cachemode2pgprot(_PAGE_CACHE_MODE_WT), 0); } -int set_memory_wt(unsigned long addr, int numpages) -{ - int ret; - - ret = reserve_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE, - _PAGE_CACHE_MODE_WT, NULL); - if (ret) - return ret; - - ret = _set_memory_wt(addr, numpages); - if (ret) - free_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE); - - return ret; -} -EXPORT_SYMBOL_GPL(set_memory_wt); - int _set_memory_wb(unsigned long addr, int numpages) { /* WB cache mode is hard wired to all cache attribute bits being 0 */ From bc04a049f058a472695aa22905d57e2b1f4c77d9 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 3 Sep 2019 09:53:52 +0200 Subject: [PATCH 8/8] x86/mm: Fix cpumask_of_node() error condition When CONFIG_DEBUG_PER_CPU_MAPS=y we validate that the @node argument of cpumask_of_node() is a valid node_id. It however forgets to check for negative numbers. Fix this by explicitly casting to unsigned int. (unsigned)node >= nr_node_ids verifies: 0 <= node < nr_node_ids Also ammend the error message to match the condition. Signed-off-by: Peter Zijlstra (Intel) Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Borislav Petkov Cc: Yunsheng Lin Link: https://lkml.kernel.org/r/20190903075352.GY2369@hirez.programming.kicks-ass.net Signed-off-by: Ingo Molnar --- arch/x86/mm/numa.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c index e6dad600614c..4123100e0eaf 100644 --- a/arch/x86/mm/numa.c +++ b/arch/x86/mm/numa.c @@ -861,9 +861,9 @@ void numa_remove_cpu(int cpu) */ const struct cpumask *cpumask_of_node(int node) { - if (node >= nr_node_ids) { + if ((unsigned)node >= nr_node_ids) { printk(KERN_WARNING - "cpumask_of_node(%d): node > nr_node_ids(%u)\n", + "cpumask_of_node(%d): (unsigned)node >= nr_node_ids(%u)\n", node, nr_node_ids); dump_stack(); return cpu_none_mask;