Merge branch 'x86-mm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 mm changes from Ingo Molnar:
 "The main changes in this cycle were:

   - reduce the x86/32 PAE per task PGD allocation overhead from 4K to
     0.032k (Fenghua Yu)

   - early_ioremap/memunmap() usage cleanups (Juergen Gross)

   - gbpages support cleanups (Luis R Rodriguez)

   - improve AMD Bulldozer (family 0x15) ASLR I$ aliasing workaround to
     increase randomization by 3 bits (per bootup) (Hector
     Marco-Gisbert)

   - misc fixlets"

* 'x86-mm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/mm: Improve AMD Bulldozer ASLR workaround
  x86/mm/pat: Initialize __cachemode2pte_tbl[] and __pte2cachemode_tbl[] in a bit more readable fashion
  init.h: Clean up the __setup()/early_param() macros
  x86/mm: Simplify probe_page_size_mask()
  x86/mm: Further simplify 1 GB kernel linear mappings handling
  x86/mm: Use early_param_on_off() for direct_gbpages
  init.h: Add early_param_on_off()
  x86/mm: Simplify enabling direct_gbpages
  x86/mm: Use IS_ENABLED() for direct_gbpages
  x86/mm: Unexport set_memory_ro() and set_memory_rw()
  x86/mm, efi: Use early_ioremap() in arch/x86/platform/efi/efi-bgrt.c
  x86/mm: Use early_memunmap() instead of early_iounmap()
  x86/mm/pat: Ensure different messages in STRICT_DEVMEM and PAT cases
  x86/mm: Reduce PAE-mode per task pgd allocation overhead from 4K to 32 bytes
This commit is contained in:
Linus Torvalds 2015-04-13 12:31:32 -08:00
commit 6cf78d4b37
14 changed files with 189 additions and 89 deletions

View File

@ -1295,14 +1295,14 @@ config ARCH_DMA_ADDR_T_64BIT
def_bool y
depends on X86_64 || HIGHMEM64G
config DIRECT_GBPAGES
bool "Enable 1GB pages for kernel pagetables" if EXPERT
default y
depends on X86_64
config X86_DIRECT_GBPAGES
def_bool y
depends on X86_64 && !DEBUG_PAGEALLOC && !KMEMCHECK
---help---
Allow the kernel linear mapping to use 1GB pages on CPUs that
support it. This can improve the kernel's performance a tiny bit by
reducing TLB pressure. If in doubt, say "Y".
Certain kernel features effectively disable kernel
linear 1 GB mappings (even if the CPU otherwise
supports them), so don't confuse the user by printing
that we have them enabled.
# Common NUMA Features
config NUMA

View File

@ -366,6 +366,7 @@ enum align_flags {
struct va_alignment {
int flags;
unsigned long mask;
unsigned long bits;
} ____cacheline_aligned;
extern struct va_alignment va_align;

View File

@ -5,6 +5,7 @@
#include <linux/io.h>
#include <linux/sched.h>
#include <linux/random.h>
#include <asm/processor.h>
#include <asm/apic.h>
#include <asm/cpu.h>
@ -488,6 +489,9 @@ static void bsp_init_amd(struct cpuinfo_x86 *c)
va_align.mask = (upperbit - 1) & PAGE_MASK;
va_align.flags = ALIGN_VA_32 | ALIGN_VA_64;
/* A random value per boot for bit slice [12:upper_bit) */
va_align.bits = get_random_int() & va_align.mask;
}
}

View File

@ -286,13 +286,13 @@ static void __init x86_flattree_get_config(void)
initial_boot_params = dt = early_memremap(initial_dtb, map_len);
size = of_get_flat_dt_size();
if (map_len < size) {
early_iounmap(dt, map_len);
early_memunmap(dt, map_len);
initial_boot_params = dt = early_memremap(initial_dtb, size);
map_len = size;
}
unflatten_and_copy_device_tree();
early_iounmap(dt, map_len);
early_memunmap(dt, map_len);
}
#else
static inline void x86_flattree_get_config(void) { }

View File

@ -661,7 +661,7 @@ void __init parse_e820_ext(u64 phys_addr, u32 data_len)
extmap = (struct e820entry *)(sdata->data);
__append_e820_map(extmap, entries);
sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
early_iounmap(sdata, data_len);
early_memunmap(sdata, data_len);
printk(KERN_INFO "e820: extended physical RAM map:\n");
e820_print_map("extended");
}

View File

@ -354,7 +354,7 @@ static void __init relocate_initrd(void)
mapaddr = ramdisk_image & PAGE_MASK;
p = early_memremap(mapaddr, clen+slop);
memcpy(q, p+slop, clen);
early_iounmap(p, clen+slop);
early_memunmap(p, clen+slop);
q += clen;
ramdisk_image += clen;
ramdisk_size -= clen;
@ -438,7 +438,7 @@ static void __init parse_setup_data(void)
data_len = data->len + sizeof(struct setup_data);
data_type = data->type;
pa_next = data->next;
early_iounmap(data, sizeof(*data));
early_memunmap(data, sizeof(*data));
switch (data_type) {
case SETUP_E820_EXT:
@ -470,7 +470,7 @@ static void __init e820_reserve_setup_data(void)
E820_RAM, E820_RESERVED_KERN);
found = 1;
pa_data = data->next;
early_iounmap(data, sizeof(*data));
early_memunmap(data, sizeof(*data));
}
if (!found)
return;
@ -491,7 +491,7 @@ static void __init memblock_x86_reserve_range_setup_data(void)
data = early_memremap(pa_data, sizeof(*data));
memblock_reserve(pa_data, sizeof(*data) + data->len);
pa_data = data->next;
early_iounmap(data, sizeof(*data));
early_memunmap(data, sizeof(*data));
}
}

View File

@ -34,10 +34,26 @@ static unsigned long get_align_mask(void)
return va_align.mask;
}
/*
* To avoid aliasing in the I$ on AMD F15h, the bits defined by the
* va_align.bits, [12:upper_bit), are set to a random value instead of
* zeroing them. This random value is computed once per boot. This form
* of ASLR is known as "per-boot ASLR".
*
* To achieve this, the random value is added to the info.align_offset
* value before calling vm_unmapped_area() or ORed directly to the
* address.
*/
static unsigned long get_align_bits(void)
{
return va_align.bits & get_align_mask();
}
unsigned long align_vdso_addr(unsigned long addr)
{
unsigned long align_mask = get_align_mask();
return (addr + align_mask) & ~align_mask;
addr = (addr + align_mask) & ~align_mask;
return addr | get_align_bits();
}
static int __init control_va_addr_alignment(char *str)
@ -135,8 +151,12 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
info.length = len;
info.low_limit = begin;
info.high_limit = end;
info.align_mask = filp ? get_align_mask() : 0;
info.align_mask = 0;
info.align_offset = pgoff << PAGE_SHIFT;
if (filp) {
info.align_mask = get_align_mask();
info.align_offset += get_align_bits();
}
return vm_unmapped_area(&info);
}
@ -174,8 +194,12 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
info.length = len;
info.low_limit = PAGE_SIZE;
info.high_limit = mm->mmap_base;
info.align_mask = filp ? get_align_mask() : 0;
info.align_mask = 0;
info.align_offset = pgoff << PAGE_SHIFT;
if (filp) {
info.align_mask = get_align_mask();
info.align_offset += get_align_bits();
}
addr = vm_unmapped_area(&info);
if (!(addr & ~PAGE_MASK))
return addr;

View File

@ -29,29 +29,33 @@
/*
* Tables translating between page_cache_type_t and pte encoding.
* Minimal supported modes are defined statically, modified if more supported
* cache modes are available.
* Index into __cachemode2pte_tbl is the cachemode.
* Index into __pte2cachemode_tbl are the caching attribute bits of the pte
* (_PAGE_PWT, _PAGE_PCD, _PAGE_PAT) at index bit positions 0, 1, 2.
*
* Minimal supported modes are defined statically, they are modified
* during bootup if more supported cache modes are available.
*
* Index into __cachemode2pte_tbl[] is the cachemode.
*
* Index into __pte2cachemode_tbl[] are the caching attribute bits of the pte
* (_PAGE_PWT, _PAGE_PCD, _PAGE_PAT) at index bit positions 0, 1, 2.
*/
uint16_t __cachemode2pte_tbl[_PAGE_CACHE_MODE_NUM] = {
[_PAGE_CACHE_MODE_WB] = 0,
[_PAGE_CACHE_MODE_WC] = _PAGE_PWT,
[_PAGE_CACHE_MODE_UC_MINUS] = _PAGE_PCD,
[_PAGE_CACHE_MODE_UC] = _PAGE_PCD | _PAGE_PWT,
[_PAGE_CACHE_MODE_WT] = _PAGE_PCD,
[_PAGE_CACHE_MODE_WP] = _PAGE_PCD,
[_PAGE_CACHE_MODE_WB ] = 0 | 0 ,
[_PAGE_CACHE_MODE_WC ] = _PAGE_PWT | 0 ,
[_PAGE_CACHE_MODE_UC_MINUS] = 0 | _PAGE_PCD,
[_PAGE_CACHE_MODE_UC ] = _PAGE_PWT | _PAGE_PCD,
[_PAGE_CACHE_MODE_WT ] = 0 | _PAGE_PCD,
[_PAGE_CACHE_MODE_WP ] = 0 | _PAGE_PCD,
};
EXPORT_SYMBOL(__cachemode2pte_tbl);
uint8_t __pte2cachemode_tbl[8] = {
[__pte2cm_idx(0)] = _PAGE_CACHE_MODE_WB,
[__pte2cm_idx(_PAGE_PWT)] = _PAGE_CACHE_MODE_WC,
[__pte2cm_idx(_PAGE_PCD)] = _PAGE_CACHE_MODE_UC_MINUS,
[__pte2cm_idx(_PAGE_PWT | _PAGE_PCD)] = _PAGE_CACHE_MODE_UC,
[__pte2cm_idx(_PAGE_PAT)] = _PAGE_CACHE_MODE_WB,
[__pte2cm_idx(_PAGE_PWT | _PAGE_PAT)] = _PAGE_CACHE_MODE_WC,
[__pte2cm_idx(_PAGE_PCD | _PAGE_PAT)] = _PAGE_CACHE_MODE_UC_MINUS,
[__pte2cm_idx( 0 | 0 | 0 )] = _PAGE_CACHE_MODE_WB,
[__pte2cm_idx(_PAGE_PWT | 0 | 0 )] = _PAGE_CACHE_MODE_WC,
[__pte2cm_idx( 0 | _PAGE_PCD | 0 )] = _PAGE_CACHE_MODE_UC_MINUS,
[__pte2cm_idx(_PAGE_PWT | _PAGE_PCD | 0 )] = _PAGE_CACHE_MODE_UC,
[__pte2cm_idx( 0 | 0 | _PAGE_PAT)] = _PAGE_CACHE_MODE_WB,
[__pte2cm_idx(_PAGE_PWT | 0 | _PAGE_PAT)] = _PAGE_CACHE_MODE_WC,
[__pte2cm_idx(0 | _PAGE_PCD | _PAGE_PAT)] = _PAGE_CACHE_MODE_UC_MINUS,
[__pte2cm_idx(_PAGE_PWT | _PAGE_PCD | _PAGE_PAT)] = _PAGE_CACHE_MODE_UC,
};
EXPORT_SYMBOL(__pte2cachemode_tbl);
@ -131,21 +135,7 @@ void __init early_alloc_pgt_buf(void)
int after_bootmem;
int direct_gbpages
#ifdef CONFIG_DIRECT_GBPAGES
= 1
#endif
;
static void __init init_gbpages(void)
{
#ifdef CONFIG_X86_64
if (direct_gbpages && cpu_has_gbpages)
printk(KERN_INFO "Using GB pages for direct mapping\n");
else
direct_gbpages = 0;
#endif
}
early_param_on_off("gbpages", "nogbpages", direct_gbpages, CONFIG_X86_DIRECT_GBPAGES);
struct map_range {
unsigned long start;
@ -157,16 +147,12 @@ static int page_size_mask;
static void __init probe_page_size_mask(void)
{
init_gbpages();
#if !defined(CONFIG_DEBUG_PAGEALLOC) && !defined(CONFIG_KMEMCHECK)
/*
* For CONFIG_DEBUG_PAGEALLOC, identity mapping will use small pages.
* This will simplify cpa(), which otherwise needs to support splitting
* large pages into small in interrupt context, etc.
*/
if (direct_gbpages)
page_size_mask |= 1 << PG_LEVEL_1G;
if (cpu_has_pse)
page_size_mask |= 1 << PG_LEVEL_2M;
#endif
@ -181,6 +167,14 @@ static void __init probe_page_size_mask(void)
__supported_pte_mask |= _PAGE_GLOBAL;
} else
__supported_pte_mask &= ~_PAGE_GLOBAL;
/* Enable 1 GB linear kernel mappings if available: */
if (direct_gbpages && cpu_has_gbpages) {
printk(KERN_INFO "Using GB pages for direct mapping\n");
page_size_mask |= 1 << PG_LEVEL_1G;
} else {
direct_gbpages = 0;
}
}
#ifdef CONFIG_X86_32

View File

@ -130,20 +130,6 @@ int kernel_ident_mapping_init(struct x86_mapping_info *info, pgd_t *pgd_page,
return 0;
}
static int __init parse_direct_gbpages_off(char *arg)
{
direct_gbpages = 0;
return 0;
}
early_param("nogbpages", parse_direct_gbpages_off);
static int __init parse_direct_gbpages_on(char *arg)
{
direct_gbpages = 1;
return 0;
}
early_param("gbpages", parse_direct_gbpages_on);
/*
* NOTE: pagetable_init alloc all the fixmap pagetables contiguous on the
* physical space so we can cache the place of the first one and move

View File

@ -81,11 +81,9 @@ void arch_report_meminfo(struct seq_file *m)
seq_printf(m, "DirectMap4M: %8lu kB\n",
direct_pages_count[PG_LEVEL_2M] << 12);
#endif
#ifdef CONFIG_X86_64
if (direct_gbpages)
seq_printf(m, "DirectMap1G: %8lu kB\n",
direct_pages_count[PG_LEVEL_1G] << 20);
#endif
}
#else
static inline void split_page_count(int level) { }
@ -1654,13 +1652,11 @@ int set_memory_ro(unsigned long addr, int numpages)
{
return change_page_attr_clear(&addr, numpages, __pgprot(_PAGE_RW), 0);
}
EXPORT_SYMBOL_GPL(set_memory_ro);
int set_memory_rw(unsigned long addr, int numpages)
{
return change_page_attr_set(&addr, numpages, __pgprot(_PAGE_RW), 0);
}
EXPORT_SYMBOL_GPL(set_memory_rw);
int set_memory_np(unsigned long addr, int numpages)
{

View File

@ -610,7 +610,7 @@ pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
}
#ifdef CONFIG_STRICT_DEVMEM
/* This check is done in drivers/char/mem.c in case of STRICT_DEVMEM*/
/* This check is done in drivers/char/mem.c in case of STRICT_DEVMEM */
static inline int range_is_allowed(unsigned long pfn, unsigned long size)
{
return 1;
@ -628,8 +628,8 @@ static inline int range_is_allowed(unsigned long pfn, unsigned long size)
while (cursor < to) {
if (!devmem_is_allowed(pfn)) {
printk(KERN_INFO "Program %s tried to access /dev/mem between [mem %#010Lx-%#010Lx]\n",
current->comm, from, to - 1);
printk(KERN_INFO "Program %s tried to access /dev/mem between [mem %#010Lx-%#010Lx], PAT prevents it\n",
current->comm, from, to - 1);
return 0;
}
cursor += PAGE_SIZE;

View File

@ -275,12 +275,87 @@ static void pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd, pmd_t *pmds[])
}
}
/*
* Xen paravirt assumes pgd table should be in one page. 64 bit kernel also
* assumes that pgd should be in one page.
*
* But kernel with PAE paging that is not running as a Xen domain
* only needs to allocate 32 bytes for pgd instead of one page.
*/
#ifdef CONFIG_X86_PAE
#include <linux/slab.h>
#define PGD_SIZE (PTRS_PER_PGD * sizeof(pgd_t))
#define PGD_ALIGN 32
static struct kmem_cache *pgd_cache;
static int __init pgd_cache_init(void)
{
/*
* When PAE kernel is running as a Xen domain, it does not use
* shared kernel pmd. And this requires a whole page for pgd.
*/
if (!SHARED_KERNEL_PMD)
return 0;
/*
* when PAE kernel is not running as a Xen domain, it uses
* shared kernel pmd. Shared kernel pmd does not require a whole
* page for pgd. We are able to just allocate a 32-byte for pgd.
* During boot time, we create a 32-byte slab for pgd table allocation.
*/
pgd_cache = kmem_cache_create("pgd_cache", PGD_SIZE, PGD_ALIGN,
SLAB_PANIC, NULL);
if (!pgd_cache)
return -ENOMEM;
return 0;
}
core_initcall(pgd_cache_init);
static inline pgd_t *_pgd_alloc(void)
{
/*
* If no SHARED_KERNEL_PMD, PAE kernel is running as a Xen domain.
* We allocate one page for pgd.
*/
if (!SHARED_KERNEL_PMD)
return (pgd_t *)__get_free_page(PGALLOC_GFP);
/*
* Now PAE kernel is not running as a Xen domain. We can allocate
* a 32-byte slab for pgd to save memory space.
*/
return kmem_cache_alloc(pgd_cache, PGALLOC_GFP);
}
static inline void _pgd_free(pgd_t *pgd)
{
if (!SHARED_KERNEL_PMD)
free_page((unsigned long)pgd);
else
kmem_cache_free(pgd_cache, pgd);
}
#else
static inline pgd_t *_pgd_alloc(void)
{
return (pgd_t *)__get_free_page(PGALLOC_GFP);
}
static inline void _pgd_free(pgd_t *pgd)
{
free_page((unsigned long)pgd);
}
#endif /* CONFIG_X86_PAE */
pgd_t *pgd_alloc(struct mm_struct *mm)
{
pgd_t *pgd;
pmd_t *pmds[PREALLOCATED_PMDS];
pgd = (pgd_t *)__get_free_page(PGALLOC_GFP);
pgd = _pgd_alloc();
if (pgd == NULL)
goto out;
@ -310,7 +385,7 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
out_free_pmds:
free_pmds(mm, pmds);
out_free_pgd:
free_page((unsigned long)pgd);
_pgd_free(pgd);
out:
return NULL;
}
@ -320,7 +395,7 @@ void pgd_free(struct mm_struct *mm, pgd_t *pgd)
pgd_mop_up_pmds(mm, pgd);
pgd_dtor(pgd);
paravirt_pgd_free(mm, pgd);
free_page((unsigned long)pgd);
_pgd_free(pgd);
}
/*

View File

@ -67,7 +67,7 @@ void __init efi_bgrt_init(void)
image = efi_lookup_mapped_addr(bgrt_tab->image_address);
if (!image) {
image = early_memremap(bgrt_tab->image_address,
image = early_ioremap(bgrt_tab->image_address,
sizeof(bmp_header));
ioremapped = true;
if (!image) {
@ -89,7 +89,7 @@ void __init efi_bgrt_init(void)
}
if (ioremapped) {
image = early_memremap(bgrt_tab->image_address,
image = early_ioremap(bgrt_tab->image_address,
bmp_header.size);
if (!image) {
pr_err("Ignoring BGRT: failed to map image memory\n");

View File

@ -253,21 +253,41 @@ struct obs_kernel_param {
* obs_kernel_param "array" too far apart in .init.setup.
*/
#define __setup_param(str, unique_id, fn, early) \
static const char __setup_str_##unique_id[] __initconst \
__aligned(1) = str; \
static struct obs_kernel_param __setup_##unique_id \
__used __section(.init.setup) \
__attribute__((aligned((sizeof(long))))) \
static const char __setup_str_##unique_id[] __initconst \
__aligned(1) = str; \
static struct obs_kernel_param __setup_##unique_id \
__used __section(.init.setup) \
__attribute__((aligned((sizeof(long))))) \
= { __setup_str_##unique_id, fn, early }
#define __setup(str, fn) \
#define __setup(str, fn) \
__setup_param(str, fn, fn, 0)
/* NOTE: fn is as per module_param, not __setup! Emits warning if fn
* returns non-zero. */
#define early_param(str, fn) \
/*
* NOTE: fn is as per module_param, not __setup!
* Emits warning if fn returns non-zero.
*/
#define early_param(str, fn) \
__setup_param(str, fn, fn, 1)
#define early_param_on_off(str_on, str_off, var, config) \
\
int var = IS_ENABLED(config); \
\
static int __init parse_##var##_on(char *arg) \
{ \
var = 1; \
return 0; \
} \
__setup_param(str_on, parse_##var##_on, parse_##var##_on, 1); \
\
static int __init parse_##var##_off(char *arg) \
{ \
var = 0; \
return 0; \
} \
__setup_param(str_off, parse_##var##_off, parse_##var##_off, 1)
/* Relies on boot_command_line being set */
void __init parse_early_param(void);
void __init parse_early_options(char *cmdline);