2008-03-19 08:00:14 +08:00
|
|
|
/*
|
|
|
|
* Handle caching attributes in page tables (PAT)
|
|
|
|
*
|
|
|
|
* Authors: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
|
|
|
|
* Suresh B Siddha <suresh.b.siddha@intel.com>
|
|
|
|
*
|
|
|
|
* Loosely based on earlier PAT patchset from Eric Biederman and Andi Kleen.
|
|
|
|
*/
|
|
|
|
|
2008-09-30 19:20:45 +08:00
|
|
|
#include <linux/seq_file.h>
|
|
|
|
#include <linux/bootmem.h>
|
|
|
|
#include <linux/debugfs.h>
|
2008-03-19 08:00:14 +08:00
|
|
|
#include <linux/kernel.h>
|
2016-01-16 08:56:43 +08:00
|
|
|
#include <linux/pfn_t.h>
|
include cleanup: Update gfp.h and slab.h includes to prepare for breaking implicit slab.h inclusion from percpu.h
percpu.h is included by sched.h and module.h and thus ends up being
included when building most .c files. percpu.h includes slab.h which
in turn includes gfp.h making everything defined by the two files
universally available and complicating inclusion dependencies.
percpu.h -> slab.h dependency is about to be removed. Prepare for
this change by updating users of gfp and slab facilities include those
headers directly instead of assuming availability. As this conversion
needs to touch large number of source files, the following script is
used as the basis of conversion.
http://userweb.kernel.org/~tj/misc/slabh-sweep.py
The script does the followings.
* Scan files for gfp and slab usages and update includes such that
only the necessary includes are there. ie. if only gfp is used,
gfp.h, if slab is used, slab.h.
* When the script inserts a new include, it looks at the include
blocks and try to put the new include such that its order conforms
to its surrounding. It's put in the include block which contains
core kernel includes, in the same order that the rest are ordered -
alphabetical, Christmas tree, rev-Xmas-tree or at the end if there
doesn't seem to be any matching order.
* If the script can't find a place to put a new include (mostly
because the file doesn't have fitting include block), it prints out
an error message indicating which .h file needs to be added to the
file.
The conversion was done in the following steps.
1. The initial automatic conversion of all .c files updated slightly
over 4000 files, deleting around 700 includes and adding ~480 gfp.h
and ~3000 slab.h inclusions. The script emitted errors for ~400
files.
2. Each error was manually checked. Some didn't need the inclusion,
some needed manual addition while adding it to implementation .h or
embedding .c file was more appropriate for others. This step added
inclusions to around 150 files.
3. The script was run again and the output was compared to the edits
from #2 to make sure no file was left behind.
4. Several build tests were done and a couple of problems were fixed.
e.g. lib/decompress_*.c used malloc/free() wrappers around slab
APIs requiring slab.h to be added manually.
5. The script was run on all .h files but without automatically
editing them as sprinkling gfp.h and slab.h inclusions around .h
files could easily lead to inclusion dependency hell. Most gfp.h
inclusion directives were ignored as stuff from gfp.h was usually
wildly available and often used in preprocessor macros. Each
slab.h inclusion directive was examined and added manually as
necessary.
6. percpu.h was updated not to include slab.h.
7. Build test were done on the following configurations and failures
were fixed. CONFIG_GCOV_KERNEL was turned off for all tests (as my
distributed build env didn't work with gcov compiles) and a few
more options had to be turned off depending on archs to make things
build (like ipr on powerpc/64 which failed due to missing writeq).
* x86 and x86_64 UP and SMP allmodconfig and a custom test config.
* powerpc and powerpc64 SMP allmodconfig
* sparc and sparc64 SMP allmodconfig
* ia64 SMP allmodconfig
* s390 SMP allmodconfig
* alpha SMP allmodconfig
* um on x86_64 SMP allmodconfig
8. percpu.h modifications were reverted so that it could be applied as
a separate patch and serve as bisection point.
Given the fact that I had only a couple of failures from tests on step
6, I'm fairly confident about the coverage of this conversion patch.
If there is a breakage, it's likely to be something in one of the arch
headers which should be easily discoverable easily on most builds of
the specific arch.
Signed-off-by: Tejun Heo <tj@kernel.org>
Guess-its-ok-by: Christoph Lameter <cl@linux-foundation.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Lee Schermerhorn <Lee.Schermerhorn@hp.com>
2010-03-24 16:04:11 +08:00
|
|
|
#include <linux/slab.h>
|
2008-09-30 19:20:45 +08:00
|
|
|
#include <linux/mm.h>
|
2008-03-19 08:00:14 +08:00
|
|
|
#include <linux/fs.h>
|
2009-07-11 00:57:36 +08:00
|
|
|
#include <linux/rbtree.h>
|
2008-03-19 08:00:14 +08:00
|
|
|
|
2008-09-30 19:20:45 +08:00
|
|
|
#include <asm/cacheflush.h>
|
2008-03-19 08:00:14 +08:00
|
|
|
#include <asm/processor.h>
|
2008-09-30 19:20:45 +08:00
|
|
|
#include <asm/tlbflush.h>
|
2009-11-20 04:23:41 +08:00
|
|
|
#include <asm/x86_init.h>
|
2008-03-19 08:00:14 +08:00
|
|
|
#include <asm/pgtable.h>
|
|
|
|
#include <asm/fcntl.h>
|
2008-09-30 19:20:45 +08:00
|
|
|
#include <asm/e820.h>
|
2008-03-19 08:00:14 +08:00
|
|
|
#include <asm/mtrr.h>
|
2008-09-30 19:20:45 +08:00
|
|
|
#include <asm/page.h>
|
|
|
|
#include <asm/msr.h>
|
|
|
|
#include <asm/pat.h>
|
2008-03-19 08:00:21 +08:00
|
|
|
#include <asm/io.h>
|
2008-03-19 08:00:14 +08:00
|
|
|
|
2010-02-11 03:57:06 +08:00
|
|
|
#include "pat_internal.h"
|
2014-11-03 21:02:03 +08:00
|
|
|
#include "mm_internal.h"
|
2010-02-11 03:57:06 +08:00
|
|
|
|
2015-05-26 16:28:11 +08:00
|
|
|
#undef pr_fmt
|
|
|
|
#define pr_fmt(fmt) "" fmt
|
|
|
|
|
2015-06-05 00:55:09 +08:00
|
|
|
static bool boot_cpu_done;
|
|
|
|
|
2015-05-26 16:28:15 +08:00
|
|
|
static int __read_mostly __pat_enabled = IS_ENABLED(CONFIG_X86_PAT);
|
2016-03-24 05:41:58 +08:00
|
|
|
static void init_cache_modes(void);
|
2008-03-19 08:00:14 +08:00
|
|
|
|
2016-03-24 05:41:58 +08:00
|
|
|
void pat_disable(const char *reason)
|
2008-03-19 08:00:14 +08:00
|
|
|
{
|
2016-03-24 05:41:58 +08:00
|
|
|
if (!__pat_enabled)
|
|
|
|
return;
|
|
|
|
|
|
|
|
if (boot_cpu_done) {
|
|
|
|
WARN_ONCE(1, "x86/PAT: PAT cannot be disabled after initialization\n");
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2015-05-26 16:28:15 +08:00
|
|
|
__pat_enabled = 0;
|
2015-05-26 16:28:11 +08:00
|
|
|
pr_info("x86/PAT: %s\n", reason);
|
2016-03-24 05:41:58 +08:00
|
|
|
|
|
|
|
init_cache_modes();
|
2008-03-19 08:00:14 +08:00
|
|
|
}
|
|
|
|
|
2008-05-29 15:01:28 +08:00
|
|
|
static int __init nopat(char *str)
|
2008-03-19 08:00:14 +08:00
|
|
|
{
|
2008-05-08 15:18:43 +08:00
|
|
|
pat_disable("PAT support disabled.");
|
2008-03-19 08:00:14 +08:00
|
|
|
return 0;
|
|
|
|
}
|
2008-05-08 15:18:43 +08:00
|
|
|
early_param("nopat", nopat);
|
2015-05-26 16:28:15 +08:00
|
|
|
|
|
|
|
bool pat_enabled(void)
|
2009-01-23 08:17:05 +08:00
|
|
|
{
|
2015-05-26 16:28:15 +08:00
|
|
|
return !!__pat_enabled;
|
2009-01-23 08:17:05 +08:00
|
|
|
}
|
2015-05-26 16:28:16 +08:00
|
|
|
EXPORT_SYMBOL_GPL(pat_enabled);
|
2008-05-06 10:09:10 +08:00
|
|
|
|
2010-02-11 03:57:06 +08:00
|
|
|
int pat_debug_enable;
|
2008-09-30 19:20:45 +08:00
|
|
|
|
2008-05-06 10:09:10 +08:00
|
|
|
static int __init pat_debug_setup(char *str)
|
|
|
|
{
|
2010-02-11 03:57:06 +08:00
|
|
|
pat_debug_enable = 1;
|
2008-05-06 10:09:10 +08:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
__setup("debugpat", pat_debug_setup);
|
|
|
|
|
2014-11-17 01:59:19 +08:00
|
|
|
#ifdef CONFIG_X86_PAT
|
|
|
|
/*
|
2015-06-05 00:55:19 +08:00
|
|
|
* X86 PAT uses page flags arch_1 and uncached together to keep track of
|
|
|
|
* memory type of pages that have backing page struct.
|
|
|
|
*
|
|
|
|
* X86 PAT supports 4 different memory types:
|
|
|
|
* - _PAGE_CACHE_MODE_WB
|
|
|
|
* - _PAGE_CACHE_MODE_WC
|
|
|
|
* - _PAGE_CACHE_MODE_UC_MINUS
|
|
|
|
* - _PAGE_CACHE_MODE_WT
|
|
|
|
*
|
|
|
|
* _PAGE_CACHE_MODE_WB is the default type.
|
2014-11-17 01:59:19 +08:00
|
|
|
*/
|
|
|
|
|
2015-06-05 00:55:19 +08:00
|
|
|
#define _PGMT_WB 0
|
2014-11-17 01:59:19 +08:00
|
|
|
#define _PGMT_WC (1UL << PG_arch_1)
|
|
|
|
#define _PGMT_UC_MINUS (1UL << PG_uncached)
|
2015-06-05 00:55:19 +08:00
|
|
|
#define _PGMT_WT (1UL << PG_uncached | 1UL << PG_arch_1)
|
2014-11-17 01:59:19 +08:00
|
|
|
#define _PGMT_MASK (1UL << PG_uncached | 1UL << PG_arch_1)
|
|
|
|
#define _PGMT_CLEAR_MASK (~_PGMT_MASK)
|
|
|
|
|
|
|
|
static inline enum page_cache_mode get_page_memtype(struct page *pg)
|
|
|
|
{
|
|
|
|
unsigned long pg_flags = pg->flags & _PGMT_MASK;
|
|
|
|
|
2015-06-05 00:55:19 +08:00
|
|
|
if (pg_flags == _PGMT_WB)
|
|
|
|
return _PAGE_CACHE_MODE_WB;
|
2014-11-17 01:59:19 +08:00
|
|
|
else if (pg_flags == _PGMT_WC)
|
|
|
|
return _PAGE_CACHE_MODE_WC;
|
|
|
|
else if (pg_flags == _PGMT_UC_MINUS)
|
|
|
|
return _PAGE_CACHE_MODE_UC_MINUS;
|
|
|
|
else
|
2015-06-05 00:55:19 +08:00
|
|
|
return _PAGE_CACHE_MODE_WT;
|
2014-11-17 01:59:19 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static inline void set_page_memtype(struct page *pg,
|
|
|
|
enum page_cache_mode memtype)
|
|
|
|
{
|
|
|
|
unsigned long memtype_flags;
|
|
|
|
unsigned long old_flags;
|
|
|
|
unsigned long new_flags;
|
|
|
|
|
|
|
|
switch (memtype) {
|
|
|
|
case _PAGE_CACHE_MODE_WC:
|
|
|
|
memtype_flags = _PGMT_WC;
|
|
|
|
break;
|
|
|
|
case _PAGE_CACHE_MODE_UC_MINUS:
|
|
|
|
memtype_flags = _PGMT_UC_MINUS;
|
|
|
|
break;
|
2015-06-05 00:55:19 +08:00
|
|
|
case _PAGE_CACHE_MODE_WT:
|
|
|
|
memtype_flags = _PGMT_WT;
|
2014-11-17 01:59:19 +08:00
|
|
|
break;
|
2015-06-05 00:55:19 +08:00
|
|
|
case _PAGE_CACHE_MODE_WB:
|
2014-11-17 01:59:19 +08:00
|
|
|
default:
|
2015-06-05 00:55:19 +08:00
|
|
|
memtype_flags = _PGMT_WB;
|
2014-11-17 01:59:19 +08:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
do {
|
|
|
|
old_flags = pg->flags;
|
|
|
|
new_flags = (old_flags & _PGMT_CLEAR_MASK) | memtype_flags;
|
|
|
|
} while (cmpxchg(&pg->flags, old_flags, new_flags) != old_flags);
|
|
|
|
}
|
|
|
|
#else
|
|
|
|
static inline enum page_cache_mode get_page_memtype(struct page *pg)
|
|
|
|
{
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
static inline void set_page_memtype(struct page *pg,
|
|
|
|
enum page_cache_mode memtype)
|
|
|
|
{
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2008-03-19 08:00:14 +08:00
|
|
|
enum {
|
|
|
|
PAT_UC = 0, /* uncached */
|
|
|
|
PAT_WC = 1, /* Write combining */
|
|
|
|
PAT_WT = 4, /* Write Through */
|
|
|
|
PAT_WP = 5, /* Write Protected */
|
|
|
|
PAT_WB = 6, /* Write Back (default) */
|
2016-02-24 07:34:30 +08:00
|
|
|
PAT_UC_MINUS = 7, /* UC, but can be overridden by MTRR */
|
2008-03-19 08:00:14 +08:00
|
|
|
};
|
|
|
|
|
2014-11-03 21:02:03 +08:00
|
|
|
#define CM(c) (_PAGE_CACHE_MODE_ ## c)
|
|
|
|
|
|
|
|
static enum page_cache_mode pat_get_cache_mode(unsigned pat_val, char *msg)
|
|
|
|
{
|
|
|
|
enum page_cache_mode cache;
|
|
|
|
char *cache_mode;
|
|
|
|
|
|
|
|
switch (pat_val) {
|
|
|
|
case PAT_UC: cache = CM(UC); cache_mode = "UC "; break;
|
|
|
|
case PAT_WC: cache = CM(WC); cache_mode = "WC "; break;
|
|
|
|
case PAT_WT: cache = CM(WT); cache_mode = "WT "; break;
|
|
|
|
case PAT_WP: cache = CM(WP); cache_mode = "WP "; break;
|
|
|
|
case PAT_WB: cache = CM(WB); cache_mode = "WB "; break;
|
|
|
|
case PAT_UC_MINUS: cache = CM(UC_MINUS); cache_mode = "UC- "; break;
|
|
|
|
default: cache = CM(WB); cache_mode = "WB "; break;
|
|
|
|
}
|
|
|
|
|
|
|
|
memcpy(msg, cache_mode, 4);
|
|
|
|
|
|
|
|
return cache;
|
|
|
|
}
|
|
|
|
|
|
|
|
#undef CM
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Update the cache mode to pgprot translation tables according to PAT
|
|
|
|
* configuration.
|
|
|
|
* Using lower indices is preferred, so we start with highest index.
|
|
|
|
*/
|
2016-03-24 05:42:02 +08:00
|
|
|
static void __init_cache_modes(u64 pat)
|
2014-11-03 21:02:03 +08:00
|
|
|
{
|
|
|
|
enum page_cache_mode cache;
|
|
|
|
char pat_msg[33];
|
2015-06-05 00:55:10 +08:00
|
|
|
int i;
|
2014-11-03 21:02:03 +08:00
|
|
|
|
|
|
|
pat_msg[32] = 0;
|
|
|
|
for (i = 7; i >= 0; i--) {
|
|
|
|
cache = pat_get_cache_mode((pat >> (i * 8)) & 7,
|
|
|
|
pat_msg + 4 * i);
|
|
|
|
update_cache_mode_entry(i, cache);
|
|
|
|
}
|
2015-05-26 16:28:11 +08:00
|
|
|
pr_info("x86/PAT: Configuration [0-7]: %s\n", pat_msg);
|
2014-11-03 21:02:03 +08:00
|
|
|
}
|
|
|
|
|
2008-06-10 22:05:39 +08:00
|
|
|
#define PAT(x, y) ((u64)PAT_ ## y << ((x)*8))
|
2008-03-19 08:00:14 +08:00
|
|
|
|
2015-06-05 00:55:09 +08:00
|
|
|
static void pat_bsp_init(u64 pat)
|
2008-03-19 08:00:14 +08:00
|
|
|
{
|
2015-06-05 00:55:10 +08:00
|
|
|
u64 tmp_pat;
|
|
|
|
|
2016-03-24 05:41:59 +08:00
|
|
|
if (!boot_cpu_has(X86_FEATURE_PAT)) {
|
2015-06-05 00:55:09 +08:00
|
|
|
pat_disable("PAT not supported by CPU.");
|
|
|
|
return;
|
|
|
|
}
|
2008-03-19 08:00:14 +08:00
|
|
|
|
2015-06-05 00:55:10 +08:00
|
|
|
rdmsrl(MSR_IA32_CR_PAT, tmp_pat);
|
|
|
|
if (!tmp_pat) {
|
2015-06-05 00:55:09 +08:00
|
|
|
pat_disable("PAT MSR is 0, disabled.");
|
2008-03-19 08:00:14 +08:00
|
|
|
return;
|
2015-06-05 00:55:09 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
wrmsrl(MSR_IA32_CR_PAT, pat);
|
2008-03-19 08:00:14 +08:00
|
|
|
|
2016-03-24 05:41:57 +08:00
|
|
|
__init_cache_modes(pat);
|
2015-06-05 00:55:09 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static void pat_ap_init(u64 pat)
|
|
|
|
{
|
2016-03-24 05:41:59 +08:00
|
|
|
if (!boot_cpu_has(X86_FEATURE_PAT)) {
|
2015-06-05 00:55:09 +08:00
|
|
|
/*
|
|
|
|
* If this happens we are on a secondary CPU, but switched to
|
|
|
|
* PAT on the boot CPU. We have no way to undo PAT.
|
|
|
|
*/
|
|
|
|
panic("x86/PAT: PAT enabled, but not supported by secondary CPU\n");
|
2008-05-08 15:18:43 +08:00
|
|
|
}
|
2008-03-19 08:00:14 +08:00
|
|
|
|
2015-06-05 00:55:09 +08:00
|
|
|
wrmsrl(MSR_IA32_CR_PAT, pat);
|
|
|
|
}
|
|
|
|
|
2016-03-24 05:41:57 +08:00
|
|
|
static void init_cache_modes(void)
|
2015-06-05 00:55:09 +08:00
|
|
|
{
|
2016-03-24 05:41:57 +08:00
|
|
|
u64 pat = 0;
|
|
|
|
static int init_cm_done;
|
2015-06-05 00:55:09 +08:00
|
|
|
|
2016-03-24 05:41:57 +08:00
|
|
|
if (init_cm_done)
|
|
|
|
return;
|
|
|
|
|
|
|
|
if (boot_cpu_has(X86_FEATURE_PAT)) {
|
|
|
|
/*
|
|
|
|
* CPU supports PAT. Set PAT table to be consistent with
|
|
|
|
* PAT MSR. This case supports "nopat" boot option, and
|
|
|
|
* virtual machine environments which support PAT without
|
|
|
|
* MTRRs. In specific, Xen has unique setup to PAT MSR.
|
|
|
|
*
|
|
|
|
* If PAT MSR returns 0, it is considered invalid and emulates
|
|
|
|
* as No PAT.
|
|
|
|
*/
|
|
|
|
rdmsrl(MSR_IA32_CR_PAT, pat);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!pat) {
|
2015-06-05 00:55:10 +08:00
|
|
|
/*
|
|
|
|
* No PAT. Emulate the PAT table that corresponds to the two
|
2016-03-24 05:41:57 +08:00
|
|
|
* cache bits, PWT (Write Through) and PCD (Cache Disable).
|
|
|
|
* This setup is also the same as the BIOS default setup.
|
2015-06-05 00:55:10 +08:00
|
|
|
*
|
x86/mm/pat: Use 7th PAT MSR slot for Write-Through PAT type
Assign Write-Through type to the PA7 slot in the PAT MSR when
the processor is not affected by PAT errata. The PA7 slot is
chosen to improve robustness in the presence of errata that
might cause the high PAT bit to be ignored. This way a buggy PA7
slot access will hit the PA3 slot, which is UC, so at worst we
lose performance without causing a correctness issue.
The following Intel processors are affected by the PAT errata.
Errata CPUID
----------------------------------------------------
Pentium 2, A52 family 0x6, model 0x5
Pentium 3, E27 family 0x6, model 0x7, 0x8
Pentium 3 Xenon, G26 family 0x6, model 0x7, 0x8, 0xa
Pentium M, Y26 family 0x6, model 0x9
Pentium M 90nm, X9 family 0x6, model 0xd
Pentium 4, N46 family 0xf, model 0x0
Instead of making sharp boundary checks, we remain conservative
and exclude all Pentium 2, 3, M and 4 family processors. For
those, _PAGE_CACHE_MODE_WT is redirected to UC- per the default
setup in __cachemode2pte_tbl[].
Signed-off-by: Toshi Kani <toshi.kani@hp.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Elliott@hp.com
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Luis R. Rodriguez <mcgrof@suse.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: arnd@arndb.de
Cc: hch@lst.de
Cc: hmh@hmh.eng.br
Cc: jgross@suse.com
Cc: konrad.wilk@oracle.com
Cc: linux-mm <linux-mm@kvack.org>
Cc: linux-nvdimm@lists.01.org
Cc: stefan.bader@canonical.com
Cc: yigal@plexistor.com
Link: https://lkml.kernel.org/r/1433187393-22688-2-git-send-email-toshi.kani@hp.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
2015-06-05 00:55:12 +08:00
|
|
|
* PTE encoding:
|
2015-06-05 00:55:10 +08:00
|
|
|
*
|
|
|
|
* PCD
|
|
|
|
* |PWT PAT
|
|
|
|
* || slot
|
|
|
|
* 00 0 WB : _PAGE_CACHE_MODE_WB
|
|
|
|
* 01 1 WT : _PAGE_CACHE_MODE_WT
|
|
|
|
* 10 2 UC-: _PAGE_CACHE_MODE_UC_MINUS
|
|
|
|
* 11 3 UC : _PAGE_CACHE_MODE_UC
|
|
|
|
*
|
|
|
|
* NOTE: When WC or WP is used, it is redirected to UC- per
|
|
|
|
* the default setup in __cachemode2pte_tbl[].
|
|
|
|
*/
|
|
|
|
pat = PAT(0, WB) | PAT(1, WT) | PAT(2, UC_MINUS) | PAT(3, UC) |
|
|
|
|
PAT(4, WB) | PAT(5, WT) | PAT(6, UC_MINUS) | PAT(7, UC);
|
2016-03-24 05:41:57 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
__init_cache_modes(pat);
|
|
|
|
|
|
|
|
init_cm_done = 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* pat_init - Initialize PAT MSR and PAT table
|
|
|
|
*
|
|
|
|
* This function initializes PAT MSR and PAT table with an OS-defined value
|
|
|
|
* to enable additional cache attributes, WC and WT.
|
|
|
|
*
|
|
|
|
* This function must be called on all CPUs using the specific sequence of
|
|
|
|
* operations defined in Intel SDM. mtrr_rendezvous_handler() provides this
|
|
|
|
* procedure for PAT.
|
|
|
|
*/
|
|
|
|
void pat_init(void)
|
|
|
|
{
|
|
|
|
u64 pat;
|
|
|
|
struct cpuinfo_x86 *c = &boot_cpu_data;
|
|
|
|
|
|
|
|
if (!pat_enabled()) {
|
|
|
|
init_cache_modes();
|
|
|
|
return;
|
|
|
|
}
|
x86/mm/pat: Use 7th PAT MSR slot for Write-Through PAT type
Assign Write-Through type to the PA7 slot in the PAT MSR when
the processor is not affected by PAT errata. The PA7 slot is
chosen to improve robustness in the presence of errata that
might cause the high PAT bit to be ignored. This way a buggy PA7
slot access will hit the PA3 slot, which is UC, so at worst we
lose performance without causing a correctness issue.
The following Intel processors are affected by the PAT errata.
Errata CPUID
----------------------------------------------------
Pentium 2, A52 family 0x6, model 0x5
Pentium 3, E27 family 0x6, model 0x7, 0x8
Pentium 3 Xenon, G26 family 0x6, model 0x7, 0x8, 0xa
Pentium M, Y26 family 0x6, model 0x9
Pentium M 90nm, X9 family 0x6, model 0xd
Pentium 4, N46 family 0xf, model 0x0
Instead of making sharp boundary checks, we remain conservative
and exclude all Pentium 2, 3, M and 4 family processors. For
those, _PAGE_CACHE_MODE_WT is redirected to UC- per the default
setup in __cachemode2pte_tbl[].
Signed-off-by: Toshi Kani <toshi.kani@hp.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Elliott@hp.com
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Luis R. Rodriguez <mcgrof@suse.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: arnd@arndb.de
Cc: hch@lst.de
Cc: hmh@hmh.eng.br
Cc: jgross@suse.com
Cc: konrad.wilk@oracle.com
Cc: linux-mm <linux-mm@kvack.org>
Cc: linux-nvdimm@lists.01.org
Cc: stefan.bader@canonical.com
Cc: yigal@plexistor.com
Link: https://lkml.kernel.org/r/1433187393-22688-2-git-send-email-toshi.kani@hp.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
2015-06-05 00:55:12 +08:00
|
|
|
|
2016-03-24 05:41:57 +08:00
|
|
|
if ((c->x86_vendor == X86_VENDOR_INTEL) &&
|
|
|
|
(((c->x86 == 0x6) && (c->x86_model <= 0xd)) ||
|
|
|
|
((c->x86 == 0xf) && (c->x86_model <= 0x6)))) {
|
2015-06-05 00:55:10 +08:00
|
|
|
/*
|
x86/mm/pat: Use 7th PAT MSR slot for Write-Through PAT type
Assign Write-Through type to the PA7 slot in the PAT MSR when
the processor is not affected by PAT errata. The PA7 slot is
chosen to improve robustness in the presence of errata that
might cause the high PAT bit to be ignored. This way a buggy PA7
slot access will hit the PA3 slot, which is UC, so at worst we
lose performance without causing a correctness issue.
The following Intel processors are affected by the PAT errata.
Errata CPUID
----------------------------------------------------
Pentium 2, A52 family 0x6, model 0x5
Pentium 3, E27 family 0x6, model 0x7, 0x8
Pentium 3 Xenon, G26 family 0x6, model 0x7, 0x8, 0xa
Pentium M, Y26 family 0x6, model 0x9
Pentium M 90nm, X9 family 0x6, model 0xd
Pentium 4, N46 family 0xf, model 0x0
Instead of making sharp boundary checks, we remain conservative
and exclude all Pentium 2, 3, M and 4 family processors. For
those, _PAGE_CACHE_MODE_WT is redirected to UC- per the default
setup in __cachemode2pte_tbl[].
Signed-off-by: Toshi Kani <toshi.kani@hp.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Elliott@hp.com
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Luis R. Rodriguez <mcgrof@suse.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: arnd@arndb.de
Cc: hch@lst.de
Cc: hmh@hmh.eng.br
Cc: jgross@suse.com
Cc: konrad.wilk@oracle.com
Cc: linux-mm <linux-mm@kvack.org>
Cc: linux-nvdimm@lists.01.org
Cc: stefan.bader@canonical.com
Cc: yigal@plexistor.com
Link: https://lkml.kernel.org/r/1433187393-22688-2-git-send-email-toshi.kani@hp.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
2015-06-05 00:55:12 +08:00
|
|
|
* PAT support with the lower four entries. Intel Pentium 2,
|
|
|
|
* 3, M, and 4 are affected by PAT errata, which makes the
|
|
|
|
* upper four entries unusable. To be on the safe side, we don't
|
|
|
|
* use those.
|
|
|
|
*
|
|
|
|
* PTE encoding:
|
2015-06-05 00:55:10 +08:00
|
|
|
* PAT
|
|
|
|
* |PCD
|
x86/mm/pat: Use 7th PAT MSR slot for Write-Through PAT type
Assign Write-Through type to the PA7 slot in the PAT MSR when
the processor is not affected by PAT errata. The PA7 slot is
chosen to improve robustness in the presence of errata that
might cause the high PAT bit to be ignored. This way a buggy PA7
slot access will hit the PA3 slot, which is UC, so at worst we
lose performance without causing a correctness issue.
The following Intel processors are affected by the PAT errata.
Errata CPUID
----------------------------------------------------
Pentium 2, A52 family 0x6, model 0x5
Pentium 3, E27 family 0x6, model 0x7, 0x8
Pentium 3 Xenon, G26 family 0x6, model 0x7, 0x8, 0xa
Pentium M, Y26 family 0x6, model 0x9
Pentium M 90nm, X9 family 0x6, model 0xd
Pentium 4, N46 family 0xf, model 0x0
Instead of making sharp boundary checks, we remain conservative
and exclude all Pentium 2, 3, M and 4 family processors. For
those, _PAGE_CACHE_MODE_WT is redirected to UC- per the default
setup in __cachemode2pte_tbl[].
Signed-off-by: Toshi Kani <toshi.kani@hp.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Elliott@hp.com
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Luis R. Rodriguez <mcgrof@suse.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: arnd@arndb.de
Cc: hch@lst.de
Cc: hmh@hmh.eng.br
Cc: jgross@suse.com
Cc: konrad.wilk@oracle.com
Cc: linux-mm <linux-mm@kvack.org>
Cc: linux-nvdimm@lists.01.org
Cc: stefan.bader@canonical.com
Cc: yigal@plexistor.com
Link: https://lkml.kernel.org/r/1433187393-22688-2-git-send-email-toshi.kani@hp.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
2015-06-05 00:55:12 +08:00
|
|
|
* ||PWT PAT
|
|
|
|
* ||| slot
|
|
|
|
* 000 0 WB : _PAGE_CACHE_MODE_WB
|
|
|
|
* 001 1 WC : _PAGE_CACHE_MODE_WC
|
|
|
|
* 010 2 UC-: _PAGE_CACHE_MODE_UC_MINUS
|
|
|
|
* 011 3 UC : _PAGE_CACHE_MODE_UC
|
2015-06-05 00:55:10 +08:00
|
|
|
* PAT bit unused
|
x86/mm/pat: Use 7th PAT MSR slot for Write-Through PAT type
Assign Write-Through type to the PA7 slot in the PAT MSR when
the processor is not affected by PAT errata. The PA7 slot is
chosen to improve robustness in the presence of errata that
might cause the high PAT bit to be ignored. This way a buggy PA7
slot access will hit the PA3 slot, which is UC, so at worst we
lose performance without causing a correctness issue.
The following Intel processors are affected by the PAT errata.
Errata CPUID
----------------------------------------------------
Pentium 2, A52 family 0x6, model 0x5
Pentium 3, E27 family 0x6, model 0x7, 0x8
Pentium 3 Xenon, G26 family 0x6, model 0x7, 0x8, 0xa
Pentium M, Y26 family 0x6, model 0x9
Pentium M 90nm, X9 family 0x6, model 0xd
Pentium 4, N46 family 0xf, model 0x0
Instead of making sharp boundary checks, we remain conservative
and exclude all Pentium 2, 3, M and 4 family processors. For
those, _PAGE_CACHE_MODE_WT is redirected to UC- per the default
setup in __cachemode2pte_tbl[].
Signed-off-by: Toshi Kani <toshi.kani@hp.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Elliott@hp.com
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Luis R. Rodriguez <mcgrof@suse.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: arnd@arndb.de
Cc: hch@lst.de
Cc: hmh@hmh.eng.br
Cc: jgross@suse.com
Cc: konrad.wilk@oracle.com
Cc: linux-mm <linux-mm@kvack.org>
Cc: linux-nvdimm@lists.01.org
Cc: stefan.bader@canonical.com
Cc: yigal@plexistor.com
Link: https://lkml.kernel.org/r/1433187393-22688-2-git-send-email-toshi.kani@hp.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
2015-06-05 00:55:12 +08:00
|
|
|
*
|
|
|
|
* NOTE: When WT or WP is used, it is redirected to UC- per
|
|
|
|
* the default setup in __cachemode2pte_tbl[].
|
2015-06-05 00:55:10 +08:00
|
|
|
*/
|
|
|
|
pat = PAT(0, WB) | PAT(1, WC) | PAT(2, UC_MINUS) | PAT(3, UC) |
|
|
|
|
PAT(4, WB) | PAT(5, WC) | PAT(6, UC_MINUS) | PAT(7, UC);
|
x86/mm/pat: Use 7th PAT MSR slot for Write-Through PAT type
Assign Write-Through type to the PA7 slot in the PAT MSR when
the processor is not affected by PAT errata. The PA7 slot is
chosen to improve robustness in the presence of errata that
might cause the high PAT bit to be ignored. This way a buggy PA7
slot access will hit the PA3 slot, which is UC, so at worst we
lose performance without causing a correctness issue.
The following Intel processors are affected by the PAT errata.
Errata CPUID
----------------------------------------------------
Pentium 2, A52 family 0x6, model 0x5
Pentium 3, E27 family 0x6, model 0x7, 0x8
Pentium 3 Xenon, G26 family 0x6, model 0x7, 0x8, 0xa
Pentium M, Y26 family 0x6, model 0x9
Pentium M 90nm, X9 family 0x6, model 0xd
Pentium 4, N46 family 0xf, model 0x0
Instead of making sharp boundary checks, we remain conservative
and exclude all Pentium 2, 3, M and 4 family processors. For
those, _PAGE_CACHE_MODE_WT is redirected to UC- per the default
setup in __cachemode2pte_tbl[].
Signed-off-by: Toshi Kani <toshi.kani@hp.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Elliott@hp.com
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Luis R. Rodriguez <mcgrof@suse.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: arnd@arndb.de
Cc: hch@lst.de
Cc: hmh@hmh.eng.br
Cc: jgross@suse.com
Cc: konrad.wilk@oracle.com
Cc: linux-mm <linux-mm@kvack.org>
Cc: linux-nvdimm@lists.01.org
Cc: stefan.bader@canonical.com
Cc: yigal@plexistor.com
Link: https://lkml.kernel.org/r/1433187393-22688-2-git-send-email-toshi.kani@hp.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
2015-06-05 00:55:12 +08:00
|
|
|
} else {
|
|
|
|
/*
|
|
|
|
* Full PAT support. We put WT in slot 7 to improve
|
|
|
|
* robustness in the presence of errata that might cause
|
|
|
|
* the high PAT bit to be ignored. This way, a buggy slot 7
|
|
|
|
* access will hit slot 3, and slot 3 is UC, so at worst
|
|
|
|
* we lose performance without causing a correctness issue.
|
|
|
|
* Pentium 4 erratum N46 is an example for such an erratum,
|
|
|
|
* although we try not to use PAT at all on affected CPUs.
|
|
|
|
*
|
|
|
|
* PTE encoding:
|
|
|
|
* PAT
|
|
|
|
* |PCD
|
|
|
|
* ||PWT PAT
|
|
|
|
* ||| slot
|
|
|
|
* 000 0 WB : _PAGE_CACHE_MODE_WB
|
|
|
|
* 001 1 WC : _PAGE_CACHE_MODE_WC
|
|
|
|
* 010 2 UC-: _PAGE_CACHE_MODE_UC_MINUS
|
|
|
|
* 011 3 UC : _PAGE_CACHE_MODE_UC
|
|
|
|
* 100 4 WB : Reserved
|
|
|
|
* 101 5 WC : Reserved
|
|
|
|
* 110 6 UC-: Reserved
|
|
|
|
* 111 7 WT : _PAGE_CACHE_MODE_WT
|
|
|
|
*
|
|
|
|
* The reserved slots are unused, but mapped to their
|
|
|
|
* corresponding types in the presence of PAT errata.
|
|
|
|
*/
|
|
|
|
pat = PAT(0, WB) | PAT(1, WC) | PAT(2, UC_MINUS) | PAT(3, UC) |
|
|
|
|
PAT(4, WB) | PAT(5, WC) | PAT(6, UC_MINUS) | PAT(7, WT);
|
2015-06-05 00:55:10 +08:00
|
|
|
}
|
2008-03-19 08:00:14 +08:00
|
|
|
|
2015-06-05 00:55:09 +08:00
|
|
|
if (!boot_cpu_done) {
|
|
|
|
pat_bsp_init(pat);
|
|
|
|
boot_cpu_done = true;
|
|
|
|
} else {
|
|
|
|
pat_ap_init(pat);
|
2015-01-12 13:15:45 +08:00
|
|
|
}
|
2008-03-19 08:00:14 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
#undef PAT
|
|
|
|
|
2010-02-11 07:26:07 +08:00
|
|
|
static DEFINE_SPINLOCK(memtype_lock); /* protects memtype accesses */
|
2009-07-11 00:57:36 +08:00
|
|
|
|
2008-03-19 08:00:14 +08:00
|
|
|
/*
|
|
|
|
* Does intersection of PAT memory type and MTRR memory type and returns
|
|
|
|
* the resulting memory type as PAT understands it.
|
|
|
|
* (Type in pat and mtrr will not have same value)
|
|
|
|
* The intersection is based on "Effective Memory Type" tables in IA-32
|
|
|
|
* SDM vol 3a
|
|
|
|
*/
|
2014-11-03 21:01:59 +08:00
|
|
|
static unsigned long pat_x_mtrr_type(u64 start, u64 end,
|
|
|
|
enum page_cache_mode req_type)
|
2008-03-19 08:00:14 +08:00
|
|
|
{
|
x86: fix Xorg crash with xf86MapVidMem error
Clarify the usage of mtrr_lookup() in PAT code, and to make PAT code
resilient to mtrr lookup problems.
Specifically, pat_x_mtrr_type() is restructured to highlight, under what
conditions we look for mtrr hint. pat_x_mtrr_type() uses a default type
when there are any errors in mtrr lookup (still maintaining the pat
consistency). And, reserve_memtype() highlights its usage ot mtrr_lookup
for request type of '-1' and also defaults in a sane way on any mtrr
lookup failure.
pat.c looks at mtrr type of a range to get a hint on what mapping type
to request when user/API: (1) hasn't specified any type (/dev/mem
mapping) and we do not want to take performance hit by always mapping
UC_MINUS. This will be the case for /dev/mem mappings used to map BIOS
area or ACPI region which are WB'able. In this case, as long as MTRR is
not WB, PAT will request UC_MINUS for such mappings.
(2) user/API requests WB mapping while in reality MTRR may have UC or
WC. In this case, PAT can map as WB (without checking MTRR) and still
effective type will be UC or WC. But, a subsequent request to map same
region as UC or WC may fail, as the region will get trackked as WB in
PAT list. Looking at MTRR hint helps us to track based on effective type
rather than what user requested. Again, here mtrr_lookup is only used as
hint and we fallback to WB mapping (as requested by user) as default.
In both cases, after using the mtrr hint, we still go through the
memtype list to make sure there are no inconsistencies among multiple
users.
Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Tested-by: Rufus & Azrael <rufus-azrael@numericable.fr>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2008-05-30 03:01:44 +08:00
|
|
|
/*
|
|
|
|
* Look for MTRR hint to get the effective type in case where PAT
|
|
|
|
* request is for WB.
|
|
|
|
*/
|
2014-11-03 21:01:59 +08:00
|
|
|
if (req_type == _PAGE_CACHE_MODE_WB) {
|
2015-05-26 16:28:10 +08:00
|
|
|
u8 mtrr_type, uniform;
|
2008-06-18 21:38:57 +08:00
|
|
|
|
2015-05-26 16:28:10 +08:00
|
|
|
mtrr_type = mtrr_type_lookup(start, end, &uniform);
|
2009-04-10 05:26:51 +08:00
|
|
|
if (mtrr_type != MTRR_TYPE_WRBACK)
|
2014-11-03 21:01:59 +08:00
|
|
|
return _PAGE_CACHE_MODE_UC_MINUS;
|
2009-04-10 05:26:51 +08:00
|
|
|
|
2014-11-03 21:01:59 +08:00
|
|
|
return _PAGE_CACHE_MODE_WB;
|
2008-06-18 21:38:57 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
return req_type;
|
2008-03-19 08:00:14 +08:00
|
|
|
}
|
|
|
|
|
2012-05-26 05:12:46 +08:00
|
|
|
struct pagerange_state {
|
|
|
|
unsigned long cur_pfn;
|
|
|
|
int ram;
|
|
|
|
int not_ram;
|
|
|
|
};
|
|
|
|
|
|
|
|
static int
|
|
|
|
pagerange_is_ram_callback(unsigned long initial_pfn, unsigned long total_nr_pages, void *arg)
|
|
|
|
{
|
|
|
|
struct pagerange_state *state = arg;
|
|
|
|
|
|
|
|
state->not_ram |= initial_pfn > state->cur_pfn;
|
|
|
|
state->ram |= total_nr_pages > 0;
|
|
|
|
state->cur_pfn = initial_pfn + total_nr_pages;
|
|
|
|
|
|
|
|
return state->ram && state->not_ram;
|
|
|
|
}
|
|
|
|
|
2010-07-22 13:57:35 +08:00
|
|
|
static int pat_pagerange_is_ram(resource_size_t start, resource_size_t end)
|
2009-02-12 03:20:23 +08:00
|
|
|
{
|
2012-05-26 05:12:46 +08:00
|
|
|
int ret = 0;
|
|
|
|
unsigned long start_pfn = start >> PAGE_SHIFT;
|
|
|
|
unsigned long end_pfn = (end + PAGE_SIZE - 1) >> PAGE_SHIFT;
|
|
|
|
struct pagerange_state state = {start_pfn, 0, 0};
|
|
|
|
|
|
|
|
/*
|
|
|
|
* For legacy reasons, physical address range in the legacy ISA
|
|
|
|
* region is tracked as non-RAM. This will allow users of
|
|
|
|
* /dev/mem to map portions of legacy ISA region, even when
|
|
|
|
* some of those portions are listed(or not even listed) with
|
|
|
|
* different e820 types(RAM/reserved/..)
|
|
|
|
*/
|
|
|
|
if (start_pfn < ISA_END_ADDRESS >> PAGE_SHIFT)
|
|
|
|
start_pfn = ISA_END_ADDRESS >> PAGE_SHIFT;
|
|
|
|
|
|
|
|
if (start_pfn < end_pfn) {
|
|
|
|
ret = walk_system_ram_range(start_pfn, end_pfn - start_pfn,
|
|
|
|
&state, pagerange_is_ram_callback);
|
2009-02-12 03:20:23 +08:00
|
|
|
}
|
|
|
|
|
2012-05-26 05:12:46 +08:00
|
|
|
return (ret > 0) ? -1 : (state.ram ? 1 : 0);
|
2009-02-12 03:20:23 +08:00
|
|
|
}
|
|
|
|
|
2008-09-24 23:53:33 +08:00
|
|
|
/*
|
2009-07-11 00:57:38 +08:00
|
|
|
* For RAM pages, we use page flags to mark the pages with appropriate type.
|
2015-06-05 00:55:19 +08:00
|
|
|
* The page flags are limited to four types, WB (default), WC, WT and UC-.
|
|
|
|
* WP request fails with -EINVAL, and UC gets redirected to UC-. Setting
|
|
|
|
* a new memory type is only allowed for a page mapped with the default WB
|
|
|
|
* type.
|
2015-06-05 00:55:13 +08:00
|
|
|
*
|
|
|
|
* Here we do two passes:
|
|
|
|
* - Find the memtype of all the pages in the range, look for any conflicts.
|
|
|
|
* - In case of no conflicts, set the new memtype for pages in the range.
|
2008-09-24 23:53:33 +08:00
|
|
|
*/
|
2014-11-03 21:01:59 +08:00
|
|
|
static int reserve_ram_pages_type(u64 start, u64 end,
|
|
|
|
enum page_cache_mode req_type,
|
|
|
|
enum page_cache_mode *new_type)
|
2008-09-24 23:53:33 +08:00
|
|
|
{
|
|
|
|
struct page *page;
|
2009-07-11 00:57:38 +08:00
|
|
|
u64 pfn;
|
|
|
|
|
2015-06-05 00:55:19 +08:00
|
|
|
if (req_type == _PAGE_CACHE_MODE_WP) {
|
2015-06-05 00:55:13 +08:00
|
|
|
if (new_type)
|
|
|
|
*new_type = _PAGE_CACHE_MODE_UC_MINUS;
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2014-11-03 21:01:59 +08:00
|
|
|
if (req_type == _PAGE_CACHE_MODE_UC) {
|
2009-07-11 00:57:38 +08:00
|
|
|
/* We do not support strong UC */
|
|
|
|
WARN_ON_ONCE(1);
|
2014-11-03 21:01:59 +08:00
|
|
|
req_type = _PAGE_CACHE_MODE_UC_MINUS;
|
2009-07-11 00:57:38 +08:00
|
|
|
}
|
2008-09-24 23:53:33 +08:00
|
|
|
|
|
|
|
for (pfn = (start >> PAGE_SHIFT); pfn < (end >> PAGE_SHIFT); ++pfn) {
|
2014-11-03 21:01:59 +08:00
|
|
|
enum page_cache_mode type;
|
2008-09-24 23:53:33 +08:00
|
|
|
|
2009-07-11 00:57:38 +08:00
|
|
|
page = pfn_to_page(pfn);
|
|
|
|
type = get_page_memtype(page);
|
2015-06-05 00:55:19 +08:00
|
|
|
if (type != _PAGE_CACHE_MODE_WB) {
|
2015-05-26 16:28:11 +08:00
|
|
|
pr_info("x86/PAT: reserve_ram_pages_type failed [mem %#010Lx-%#010Lx], track 0x%x, req 0x%x\n",
|
2012-05-30 06:06:29 +08:00
|
|
|
start, end - 1, type, req_type);
|
2009-07-11 00:57:38 +08:00
|
|
|
if (new_type)
|
|
|
|
*new_type = type;
|
|
|
|
|
|
|
|
return -EBUSY;
|
|
|
|
}
|
2008-09-24 23:53:33 +08:00
|
|
|
}
|
|
|
|
|
2009-07-11 00:57:38 +08:00
|
|
|
if (new_type)
|
|
|
|
*new_type = req_type;
|
|
|
|
|
|
|
|
for (pfn = (start >> PAGE_SHIFT); pfn < (end >> PAGE_SHIFT); ++pfn) {
|
2008-09-24 23:53:33 +08:00
|
|
|
page = pfn_to_page(pfn);
|
2009-07-11 00:57:38 +08:00
|
|
|
set_page_memtype(page, req_type);
|
2008-09-24 23:53:33 +08:00
|
|
|
}
|
2009-07-11 00:57:38 +08:00
|
|
|
return 0;
|
2008-09-24 23:53:33 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static int free_ram_pages_type(u64 start, u64 end)
|
|
|
|
{
|
|
|
|
struct page *page;
|
2009-07-11 00:57:38 +08:00
|
|
|
u64 pfn;
|
2008-09-24 23:53:33 +08:00
|
|
|
|
|
|
|
for (pfn = (start >> PAGE_SHIFT); pfn < (end >> PAGE_SHIFT); ++pfn) {
|
|
|
|
page = pfn_to_page(pfn);
|
2015-06-05 00:55:19 +08:00
|
|
|
set_page_memtype(page, _PAGE_CACHE_MODE_WB);
|
2008-09-24 23:53:33 +08:00
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2008-03-19 08:00:21 +08:00
|
|
|
/*
|
|
|
|
* req_type typically has one of the:
|
2014-11-03 21:01:59 +08:00
|
|
|
* - _PAGE_CACHE_MODE_WB
|
|
|
|
* - _PAGE_CACHE_MODE_WC
|
|
|
|
* - _PAGE_CACHE_MODE_UC_MINUS
|
|
|
|
* - _PAGE_CACHE_MODE_UC
|
2015-06-05 00:55:13 +08:00
|
|
|
* - _PAGE_CACHE_MODE_WT
|
2008-03-19 08:00:21 +08:00
|
|
|
*
|
2008-06-21 04:01:49 +08:00
|
|
|
* If new_type is NULL, function will return an error if it cannot reserve the
|
|
|
|
* region with req_type. If new_type is non-NULL, function will return
|
|
|
|
* available type in new_type in case of no error. In case of any error
|
2008-03-19 08:00:21 +08:00
|
|
|
* it will return a negative return value.
|
|
|
|
*/
|
2014-11-03 21:01:59 +08:00
|
|
|
int reserve_memtype(u64 start, u64 end, enum page_cache_mode req_type,
|
|
|
|
enum page_cache_mode *new_type)
|
2008-03-19 08:00:14 +08:00
|
|
|
{
|
2010-02-11 03:57:06 +08:00
|
|
|
struct memtype *new;
|
2014-11-03 21:01:59 +08:00
|
|
|
enum page_cache_mode actual_type;
|
2008-09-24 23:53:33 +08:00
|
|
|
int is_range_ram;
|
2008-09-30 19:20:45 +08:00
|
|
|
int err = 0;
|
2008-03-19 08:00:14 +08:00
|
|
|
|
2008-09-30 19:20:45 +08:00
|
|
|
BUG_ON(start >= end); /* end is exclusive */
|
2008-06-21 04:03:06 +08:00
|
|
|
|
2015-05-26 16:28:15 +08:00
|
|
|
if (!pat_enabled()) {
|
2008-03-19 08:00:21 +08:00
|
|
|
/* This is identical to page table setting without PAT */
|
2015-06-05 00:55:11 +08:00
|
|
|
if (new_type)
|
|
|
|
*new_type = req_type;
|
2008-03-19 08:00:14 +08:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Low ISA region is always mapped WB in page table. No need to track */
|
2009-11-24 06:49:20 +08:00
|
|
|
if (x86_platform.is_untracked_pat_range(start, end)) {
|
2008-06-21 04:01:49 +08:00
|
|
|
if (new_type)
|
2014-11-03 21:01:59 +08:00
|
|
|
*new_type = _PAGE_CACHE_MODE_WB;
|
2008-03-19 08:00:14 +08:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2009-04-10 05:26:51 +08:00
|
|
|
/*
|
|
|
|
* Call mtrr_lookup to get the type hint. This is an
|
|
|
|
* optimization for /dev/mem mmap'ers into WB memory (BIOS
|
|
|
|
* tools and ACPI tools). Use WB request for WB memory and use
|
|
|
|
* UC_MINUS otherwise.
|
|
|
|
*/
|
2014-11-03 21:01:59 +08:00
|
|
|
actual_type = pat_x_mtrr_type(start, end, req_type);
|
2008-03-19 08:00:14 +08:00
|
|
|
|
2009-01-14 02:21:30 +08:00
|
|
|
if (new_type)
|
|
|
|
*new_type = actual_type;
|
|
|
|
|
2009-02-12 03:20:23 +08:00
|
|
|
is_range_ram = pat_pagerange_is_ram(start, end);
|
2009-07-11 00:57:38 +08:00
|
|
|
if (is_range_ram == 1) {
|
|
|
|
|
|
|
|
err = reserve_ram_pages_type(start, end, req_type, new_type);
|
|
|
|
|
|
|
|
return err;
|
|
|
|
} else if (is_range_ram < 0) {
|
2008-09-24 23:53:33 +08:00
|
|
|
return -EINVAL;
|
2009-07-11 00:57:38 +08:00
|
|
|
}
|
2008-09-24 23:53:33 +08:00
|
|
|
|
2010-06-11 08:45:01 +08:00
|
|
|
new = kzalloc(sizeof(struct memtype), GFP_KERNEL);
|
2008-06-21 04:01:49 +08:00
|
|
|
if (!new)
|
2008-03-19 08:00:14 +08:00
|
|
|
return -ENOMEM;
|
|
|
|
|
2008-09-30 19:20:45 +08:00
|
|
|
new->start = start;
|
|
|
|
new->end = end;
|
|
|
|
new->type = actual_type;
|
2008-03-19 08:00:14 +08:00
|
|
|
|
|
|
|
spin_lock(&memtype_lock);
|
|
|
|
|
2010-02-11 07:26:07 +08:00
|
|
|
err = rbt_memtype_check_insert(new, new_type);
|
2008-03-19 08:00:14 +08:00
|
|
|
if (err) {
|
2015-05-26 16:28:11 +08:00
|
|
|
pr_info("x86/PAT: reserve_memtype failed [mem %#010Lx-%#010Lx], track %s, req %s\n",
|
|
|
|
start, end - 1,
|
|
|
|
cattr_name(new->type), cattr_name(req_type));
|
2008-06-21 04:01:49 +08:00
|
|
|
kfree(new);
|
2008-03-19 08:00:14 +08:00
|
|
|
spin_unlock(&memtype_lock);
|
2008-09-30 19:20:45 +08:00
|
|
|
|
2008-03-19 08:00:14 +08:00
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
|
|
|
spin_unlock(&memtype_lock);
|
2008-06-21 04:04:02 +08:00
|
|
|
|
2012-05-30 06:06:29 +08:00
|
|
|
dprintk("reserve_memtype added [mem %#010Lx-%#010Lx], track %s, req %s, ret %s\n",
|
|
|
|
start, end - 1, cattr_name(new->type), cattr_name(req_type),
|
2008-06-21 04:04:02 +08:00
|
|
|
new_type ? cattr_name(*new_type) : "-");
|
|
|
|
|
2008-03-19 08:00:14 +08:00
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
|
|
|
int free_memtype(u64 start, u64 end)
|
|
|
|
{
|
|
|
|
int err = -EINVAL;
|
2008-09-24 23:53:33 +08:00
|
|
|
int is_range_ram;
|
2010-05-26 09:51:10 +08:00
|
|
|
struct memtype *entry;
|
2008-03-19 08:00:14 +08:00
|
|
|
|
2015-05-26 16:28:15 +08:00
|
|
|
if (!pat_enabled())
|
2008-03-19 08:00:14 +08:00
|
|
|
return 0;
|
|
|
|
|
|
|
|
/* Low ISA region is always mapped WB. No need to track */
|
2009-11-24 06:49:20 +08:00
|
|
|
if (x86_platform.is_untracked_pat_range(start, end))
|
2008-03-19 08:00:14 +08:00
|
|
|
return 0;
|
|
|
|
|
2009-02-12 03:20:23 +08:00
|
|
|
is_range_ram = pat_pagerange_is_ram(start, end);
|
2009-07-11 00:57:38 +08:00
|
|
|
if (is_range_ram == 1) {
|
|
|
|
|
|
|
|
err = free_ram_pages_type(start, end);
|
|
|
|
|
|
|
|
return err;
|
|
|
|
} else if (is_range_ram < 0) {
|
2008-09-24 23:53:33 +08:00
|
|
|
return -EINVAL;
|
2009-07-11 00:57:38 +08:00
|
|
|
}
|
2008-09-24 23:53:33 +08:00
|
|
|
|
2008-03-19 08:00:14 +08:00
|
|
|
spin_lock(&memtype_lock);
|
2010-05-26 09:51:10 +08:00
|
|
|
entry = rbt_memtype_erase(start, end);
|
2008-03-19 08:00:14 +08:00
|
|
|
spin_unlock(&memtype_lock);
|
|
|
|
|
x86/mm/pat: Change free_memtype() to support shrinking case
Using mremap() to shrink the map size of a VM_PFNMAP range causes
the following error message, and leaves the pfn range allocated.
x86/PAT: test:3493 freeing invalid memtype [mem 0x483200000-0x4863fffff]
This is because rbt_memtype_erase(), called from free_memtype()
with spin_lock held, only supports to free a whole memtype node in
memtype_rbroot. Therefore, this patch changes rbt_memtype_erase()
to support a request that shrinks the size of a memtype node for
mremap().
memtype_rb_exact_match() is renamed to memtype_rb_match(), and
is enhanced to support EXACT_MATCH and END_MATCH in @match_type.
Since the memtype_rbroot tree allows overlapping ranges,
rbt_memtype_erase() checks with EXACT_MATCH first, i.e. free
a whole node for the munmap case. If no such entry is found,
it then checks with END_MATCH, i.e. shrink the size of a node
from the end for the mremap case.
On the mremap case, rbt_memtype_erase() proceeds in two steps,
1) remove the node, and then 2) insert the updated node. This
allows proper update of augmented values, subtree_max_end, in
the tree.
Signed-off-by: Toshi Kani <toshi.kani@hpe.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Borislav Petkov <bp@suse.de>
Cc: stsp@list.ru
Cc: linux-mm@kvack.org
Link: http://lkml.kernel.org/r/1450832064-10093-3-git-send-email-toshi.kani@hpe.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
2015-12-23 08:54:24 +08:00
|
|
|
if (IS_ERR(entry)) {
|
2015-05-26 16:28:11 +08:00
|
|
|
pr_info("x86/PAT: %s:%d freeing invalid memtype [mem %#010Lx-%#010Lx]\n",
|
|
|
|
current->comm, current->pid, start, end - 1);
|
2010-05-26 09:51:10 +08:00
|
|
|
return -EINVAL;
|
2008-03-19 08:00:14 +08:00
|
|
|
}
|
2008-03-19 08:00:25 +08:00
|
|
|
|
2010-05-26 09:51:10 +08:00
|
|
|
kfree(entry);
|
|
|
|
|
2012-05-30 06:06:29 +08:00
|
|
|
dprintk("free_memtype request [mem %#010Lx-%#010Lx]\n", start, end - 1);
|
2008-09-30 19:20:45 +08:00
|
|
|
|
2010-05-26 09:51:10 +08:00
|
|
|
return 0;
|
2008-03-19 08:00:14 +08:00
|
|
|
}
|
|
|
|
|
2008-03-19 08:00:20 +08:00
|
|
|
|
2009-07-11 00:57:39 +08:00
|
|
|
/**
|
|
|
|
* lookup_memtype - Looksup the memory type for a physical address
|
|
|
|
* @paddr: physical address of which memory type needs to be looked up
|
|
|
|
*
|
|
|
|
* Only to be called when PAT is enabled
|
|
|
|
*
|
2014-11-03 21:01:55 +08:00
|
|
|
* Returns _PAGE_CACHE_MODE_WB, _PAGE_CACHE_MODE_WC, _PAGE_CACHE_MODE_UC_MINUS
|
2015-06-05 00:55:19 +08:00
|
|
|
* or _PAGE_CACHE_MODE_WT.
|
2009-07-11 00:57:39 +08:00
|
|
|
*/
|
2014-11-03 21:01:55 +08:00
|
|
|
static enum page_cache_mode lookup_memtype(u64 paddr)
|
2009-07-11 00:57:39 +08:00
|
|
|
{
|
2014-11-03 21:01:55 +08:00
|
|
|
enum page_cache_mode rettype = _PAGE_CACHE_MODE_WB;
|
2009-07-11 00:57:39 +08:00
|
|
|
struct memtype *entry;
|
|
|
|
|
2009-11-24 06:49:20 +08:00
|
|
|
if (x86_platform.is_untracked_pat_range(paddr, paddr + PAGE_SIZE))
|
2009-07-11 00:57:39 +08:00
|
|
|
return rettype;
|
|
|
|
|
|
|
|
if (pat_pagerange_is_ram(paddr, paddr + PAGE_SIZE)) {
|
|
|
|
struct page *page;
|
|
|
|
|
2015-06-05 00:55:19 +08:00
|
|
|
page = pfn_to_page(paddr >> PAGE_SHIFT);
|
|
|
|
return get_page_memtype(page);
|
2009-07-11 00:57:39 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
spin_lock(&memtype_lock);
|
|
|
|
|
2010-02-11 07:26:07 +08:00
|
|
|
entry = rbt_memtype_lookup(paddr);
|
2009-07-11 00:57:39 +08:00
|
|
|
if (entry != NULL)
|
|
|
|
rettype = entry->type;
|
|
|
|
else
|
2014-11-03 21:01:55 +08:00
|
|
|
rettype = _PAGE_CACHE_MODE_UC_MINUS;
|
2009-07-11 00:57:39 +08:00
|
|
|
|
|
|
|
spin_unlock(&memtype_lock);
|
|
|
|
return rettype;
|
|
|
|
}
|
|
|
|
|
2009-07-11 00:57:34 +08:00
|
|
|
/**
|
|
|
|
* io_reserve_memtype - Request a memory type mapping for a region of memory
|
|
|
|
* @start: start (physical address) of the region
|
|
|
|
* @end: end (physical address) of the region
|
|
|
|
* @type: A pointer to memtype, with requested type. On success, requested
|
|
|
|
* or any other compatible type that was available for the region is returned
|
|
|
|
*
|
|
|
|
* On success, returns 0
|
|
|
|
* On failure, returns non-zero
|
|
|
|
*/
|
|
|
|
int io_reserve_memtype(resource_size_t start, resource_size_t end,
|
2014-11-03 21:01:54 +08:00
|
|
|
enum page_cache_mode *type)
|
2009-07-11 00:57:34 +08:00
|
|
|
{
|
2009-08-27 08:17:51 +08:00
|
|
|
resource_size_t size = end - start;
|
2014-11-03 21:01:54 +08:00
|
|
|
enum page_cache_mode req_type = *type;
|
|
|
|
enum page_cache_mode new_type;
|
2009-07-11 00:57:34 +08:00
|
|
|
int ret;
|
|
|
|
|
2009-08-27 08:17:51 +08:00
|
|
|
WARN_ON_ONCE(iomem_map_sanity_check(start, size));
|
2009-07-11 00:57:34 +08:00
|
|
|
|
|
|
|
ret = reserve_memtype(start, end, req_type, &new_type);
|
|
|
|
if (ret)
|
|
|
|
goto out_err;
|
|
|
|
|
2009-08-27 08:17:51 +08:00
|
|
|
if (!is_new_memtype_allowed(start, size, req_type, new_type))
|
2009-07-11 00:57:34 +08:00
|
|
|
goto out_free;
|
|
|
|
|
2009-08-27 08:17:51 +08:00
|
|
|
if (kernel_map_sync_memtype(start, size, new_type) < 0)
|
2009-07-11 00:57:34 +08:00
|
|
|
goto out_free;
|
|
|
|
|
|
|
|
*type = new_type;
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
out_free:
|
|
|
|
free_memtype(start, end);
|
|
|
|
ret = -EBUSY;
|
|
|
|
out_err:
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* io_free_memtype - Release a memory type mapping for a region of memory
|
|
|
|
* @start: start (physical address) of the region
|
|
|
|
* @end: end (physical address) of the region
|
|
|
|
*/
|
|
|
|
void io_free_memtype(resource_size_t start, resource_size_t end)
|
|
|
|
{
|
|
|
|
free_memtype(start, end);
|
|
|
|
}
|
|
|
|
|
2016-10-24 13:27:59 +08:00
|
|
|
int arch_io_reserve_memtype_wc(resource_size_t start, resource_size_t size)
|
|
|
|
{
|
|
|
|
enum page_cache_mode type = _PAGE_CACHE_MODE_WC;
|
|
|
|
|
|
|
|
return io_reserve_memtype(start, start + size, &type);
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL(arch_io_reserve_memtype_wc);
|
|
|
|
|
|
|
|
void arch_io_free_memtype_wc(resource_size_t start, resource_size_t size)
|
|
|
|
{
|
|
|
|
io_free_memtype(start, start + size);
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL(arch_io_free_memtype_wc);
|
|
|
|
|
2008-03-19 08:00:20 +08:00
|
|
|
pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
|
|
|
|
unsigned long size, pgprot_t vma_prot)
|
|
|
|
{
|
|
|
|
return vma_prot;
|
|
|
|
}
|
|
|
|
|
2008-07-18 06:26:59 +08:00
|
|
|
#ifdef CONFIG_STRICT_DEVMEM
|
2014-12-29 00:15:24 +08:00
|
|
|
/* This check is done in drivers/char/mem.c in case of STRICT_DEVMEM */
|
2008-04-27 02:32:12 +08:00
|
|
|
static inline int range_is_allowed(unsigned long pfn, unsigned long size)
|
|
|
|
{
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
#else
|
2008-10-31 04:59:21 +08:00
|
|
|
/* This check is needed to avoid cache aliasing when PAT is enabled */
|
2008-04-27 02:32:12 +08:00
|
|
|
static inline int range_is_allowed(unsigned long pfn, unsigned long size)
|
|
|
|
{
|
|
|
|
u64 from = ((u64)pfn) << PAGE_SHIFT;
|
|
|
|
u64 to = from + size;
|
|
|
|
u64 cursor = from;
|
|
|
|
|
2015-05-26 16:28:15 +08:00
|
|
|
if (!pat_enabled())
|
2008-10-31 04:59:21 +08:00
|
|
|
return 1;
|
|
|
|
|
2008-04-27 02:32:12 +08:00
|
|
|
while (cursor < to) {
|
2016-07-08 17:38:28 +08:00
|
|
|
if (!devmem_is_allowed(pfn))
|
2008-04-27 02:32:12 +08:00
|
|
|
return 0;
|
|
|
|
cursor += PAGE_SIZE;
|
|
|
|
pfn++;
|
|
|
|
}
|
|
|
|
return 1;
|
|
|
|
}
|
2008-07-18 06:26:59 +08:00
|
|
|
#endif /* CONFIG_STRICT_DEVMEM */
|
2008-04-27 02:32:12 +08:00
|
|
|
|
2008-03-19 08:00:20 +08:00
|
|
|
int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn,
|
|
|
|
unsigned long size, pgprot_t *vma_prot)
|
|
|
|
{
|
2014-11-03 21:01:59 +08:00
|
|
|
enum page_cache_mode pcm = _PAGE_CACHE_MODE_WB;
|
2008-03-19 08:00:20 +08:00
|
|
|
|
2008-04-27 02:32:12 +08:00
|
|
|
if (!range_is_allowed(pfn, size))
|
|
|
|
return 0;
|
|
|
|
|
2009-10-27 18:05:28 +08:00
|
|
|
if (file->f_flags & O_DSYNC)
|
2014-11-03 21:01:59 +08:00
|
|
|
pcm = _PAGE_CACHE_MODE_UC_MINUS;
|
2008-03-19 08:00:20 +08:00
|
|
|
|
2008-03-19 08:00:21 +08:00
|
|
|
*vma_prot = __pgprot((pgprot_val(*vma_prot) & ~_PAGE_CACHE_MASK) |
|
2014-11-03 21:01:59 +08:00
|
|
|
cachemode2protval(pcm));
|
2008-03-19 08:00:20 +08:00
|
|
|
return 1;
|
|
|
|
}
|
2008-03-19 08:00:21 +08:00
|
|
|
|
2009-02-25 09:35:13 +08:00
|
|
|
/*
|
|
|
|
* Change the memory type for the physial address range in kernel identity
|
|
|
|
* mapping space if that range is a part of identity map.
|
|
|
|
*/
|
2014-11-03 21:01:58 +08:00
|
|
|
int kernel_map_sync_memtype(u64 base, unsigned long size,
|
|
|
|
enum page_cache_mode pcm)
|
2009-02-25 09:35:13 +08:00
|
|
|
{
|
|
|
|
unsigned long id_sz;
|
|
|
|
|
2013-01-23 05:24:30 +08:00
|
|
|
if (base > __pa(high_memory-1))
|
2009-02-25 09:35:13 +08:00
|
|
|
return 0;
|
|
|
|
|
2013-03-08 00:31:51 +08:00
|
|
|
/*
|
|
|
|
* some areas in the middle of the kernel identity range
|
|
|
|
* are not mapped, like the PCI space.
|
|
|
|
*/
|
|
|
|
if (!page_is_ram(base >> PAGE_SHIFT))
|
|
|
|
return 0;
|
|
|
|
|
2013-01-23 05:24:30 +08:00
|
|
|
id_sz = (__pa(high_memory-1) <= base + size) ?
|
2009-02-25 09:35:13 +08:00
|
|
|
__pa(high_memory) - base :
|
|
|
|
size;
|
|
|
|
|
2014-11-03 21:01:58 +08:00
|
|
|
if (ioremap_change_attr((unsigned long)__va(base), id_sz, pcm) < 0) {
|
2015-05-26 16:28:11 +08:00
|
|
|
pr_info("x86/PAT: %s:%d ioremap_change_attr failed %s for [mem %#010Lx-%#010Lx]\n",
|
2009-02-25 09:35:13 +08:00
|
|
|
current->comm, current->pid,
|
2014-11-03 21:01:59 +08:00
|
|
|
cattr_name(pcm),
|
2012-05-30 06:06:29 +08:00
|
|
|
base, (unsigned long long)(base + size-1));
|
2009-02-25 09:35:13 +08:00
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2008-12-19 03:41:30 +08:00
|
|
|
/*
|
|
|
|
* Internal interface to reserve a range of physical memory with prot.
|
|
|
|
* Reserved non RAM regions only and after successful reserve_memtype,
|
|
|
|
* this func also keeps identity mapping (if any) in sync with this new prot.
|
|
|
|
*/
|
2009-01-10 08:13:12 +08:00
|
|
|
static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t *vma_prot,
|
|
|
|
int strict_prot)
|
2008-12-19 03:41:30 +08:00
|
|
|
{
|
|
|
|
int is_ram = 0;
|
2009-02-25 09:35:13 +08:00
|
|
|
int ret;
|
2014-11-03 21:01:59 +08:00
|
|
|
enum page_cache_mode want_pcm = pgprot2cachemode(*vma_prot);
|
|
|
|
enum page_cache_mode pcm = want_pcm;
|
2008-12-19 03:41:30 +08:00
|
|
|
|
2009-02-12 03:20:23 +08:00
|
|
|
is_ram = pat_pagerange_is_ram(paddr, paddr + size);
|
2008-12-19 03:41:30 +08:00
|
|
|
|
2009-02-12 03:20:23 +08:00
|
|
|
/*
|
2009-07-11 00:57:41 +08:00
|
|
|
* reserve_pfn_range() for RAM pages. We do not refcount to keep
|
|
|
|
* track of number of mappings of RAM pages. We can assert that
|
|
|
|
* the type requested matches the type of first page in the range.
|
2009-02-12 03:20:23 +08:00
|
|
|
*/
|
2009-07-11 00:57:41 +08:00
|
|
|
if (is_ram) {
|
2015-05-26 16:28:15 +08:00
|
|
|
if (!pat_enabled())
|
2009-07-11 00:57:41 +08:00
|
|
|
return 0;
|
|
|
|
|
2014-11-03 21:01:59 +08:00
|
|
|
pcm = lookup_memtype(paddr);
|
|
|
|
if (want_pcm != pcm) {
|
2015-05-26 16:28:11 +08:00
|
|
|
pr_warn("x86/PAT: %s:%d map pfn RAM range req %s for [mem %#010Lx-%#010Lx], got %s\n",
|
2009-07-11 00:57:41 +08:00
|
|
|
current->comm, current->pid,
|
2014-11-03 21:01:59 +08:00
|
|
|
cattr_name(want_pcm),
|
2009-07-11 00:57:41 +08:00
|
|
|
(unsigned long long)paddr,
|
2012-05-30 06:06:29 +08:00
|
|
|
(unsigned long long)(paddr + size - 1),
|
2014-11-03 21:01:59 +08:00
|
|
|
cattr_name(pcm));
|
2009-07-11 00:57:41 +08:00
|
|
|
*vma_prot = __pgprot((pgprot_val(*vma_prot) &
|
2014-11-03 21:01:59 +08:00
|
|
|
(~_PAGE_CACHE_MASK)) |
|
|
|
|
cachemode2protval(pcm));
|
2009-07-11 00:57:41 +08:00
|
|
|
}
|
2009-03-13 08:45:27 +08:00
|
|
|
return 0;
|
2009-07-11 00:57:41 +08:00
|
|
|
}
|
2008-12-19 03:41:30 +08:00
|
|
|
|
2014-11-03 21:01:59 +08:00
|
|
|
ret = reserve_memtype(paddr, paddr + size, want_pcm, &pcm);
|
2008-12-19 03:41:30 +08:00
|
|
|
if (ret)
|
|
|
|
return ret;
|
|
|
|
|
2014-11-03 21:01:59 +08:00
|
|
|
if (pcm != want_pcm) {
|
x86, pat: Allow ISA memory range uncacheable mapping requests
Max Vozeler reported:
> Bug 13877 - bogl-term broken with CONFIG_X86_PAT=y, works with =n
>
> strace of bogl-term:
> 814 mmap2(NULL, 65536, PROT_READ|PROT_WRITE, MAP_SHARED, 4, 0)
> = -1 EAGAIN (Resource temporarily unavailable)
> 814 write(2, "bogl: mmaping /dev/fb0: Resource temporarily unavailable\n",
> 57) = 57
PAT code maps the ISA memory range as WB in the PAT attribute, so that
fixed range MTRR registers define the actual memory type (UC/WC/WT etc).
But the upper level is_new_memtype_allowed() API checks are failing,
as the request here is for UC and the return tracked type is WB (Tracked type is
WB as MTRR type for this legacy range potentially will be different for each
4k page).
Fix is_new_memtype_allowed() by always succeeding the ISA address range
checks, as the null PAT (WB) and def MTRR fixed range register settings
satisfy the memory type needs of the applications that map the ISA address
range.
Reported-and-Tested-by: Max Vozeler <xam@debian.org>
Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
2009-08-18 04:23:50 +08:00
|
|
|
if (strict_prot ||
|
2014-11-03 21:01:59 +08:00
|
|
|
!is_new_memtype_allowed(paddr, size, want_pcm, pcm)) {
|
2009-01-10 08:13:12 +08:00
|
|
|
free_memtype(paddr, paddr + size);
|
2015-05-26 16:28:11 +08:00
|
|
|
pr_err("x86/PAT: %s:%d map pfn expected mapping type %s for [mem %#010Lx-%#010Lx], got %s\n",
|
|
|
|
current->comm, current->pid,
|
|
|
|
cattr_name(want_pcm),
|
|
|
|
(unsigned long long)paddr,
|
|
|
|
(unsigned long long)(paddr + size - 1),
|
|
|
|
cattr_name(pcm));
|
2009-01-10 08:13:12 +08:00
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
/*
|
|
|
|
* We allow returning different type than the one requested in
|
|
|
|
* non strict case.
|
|
|
|
*/
|
|
|
|
*vma_prot = __pgprot((pgprot_val(*vma_prot) &
|
|
|
|
(~_PAGE_CACHE_MASK)) |
|
2014-11-03 21:01:59 +08:00
|
|
|
cachemode2protval(pcm));
|
2008-12-19 03:41:30 +08:00
|
|
|
}
|
|
|
|
|
2014-11-03 21:01:59 +08:00
|
|
|
if (kernel_map_sync_memtype(paddr, size, pcm) < 0) {
|
2008-12-19 03:41:30 +08:00
|
|
|
free_memtype(paddr, paddr + size);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Internal interface to free a range of physical memory.
|
|
|
|
* Frees non RAM regions only.
|
|
|
|
*/
|
|
|
|
static void free_pfn_range(u64 paddr, unsigned long size)
|
|
|
|
{
|
|
|
|
int is_ram;
|
|
|
|
|
2009-02-12 03:20:23 +08:00
|
|
|
is_ram = pat_pagerange_is_ram(paddr, paddr + size);
|
2008-12-19 03:41:30 +08:00
|
|
|
if (is_ram == 0)
|
|
|
|
free_memtype(paddr, paddr + size);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
2012-10-09 07:28:29 +08:00
|
|
|
* track_pfn_copy is called when vma that is covering the pfnmap gets
|
2008-12-19 03:41:30 +08:00
|
|
|
* copied through copy_page_range().
|
|
|
|
*
|
|
|
|
* If the vma has a linear pfn mapping for the entire range, we get the prot
|
|
|
|
* from pte and reserve the entire vma range with single reserve_pfn_range call.
|
|
|
|
*/
|
2012-10-09 07:28:29 +08:00
|
|
|
int track_pfn_copy(struct vm_area_struct *vma)
|
2008-12-19 03:41:30 +08:00
|
|
|
{
|
2008-12-24 02:10:40 +08:00
|
|
|
resource_size_t paddr;
|
2008-12-20 05:47:28 +08:00
|
|
|
unsigned long prot;
|
2009-04-09 06:37:16 +08:00
|
|
|
unsigned long vma_size = vma->vm_end - vma->vm_start;
|
2009-01-10 08:13:12 +08:00
|
|
|
pgprot_t pgprot;
|
2008-12-19 03:41:30 +08:00
|
|
|
|
2012-10-09 07:28:34 +08:00
|
|
|
if (vma->vm_flags & VM_PAT) {
|
2008-12-19 03:41:30 +08:00
|
|
|
/*
|
2008-12-20 05:47:28 +08:00
|
|
|
* reserve the whole chunk covered by vma. We need the
|
|
|
|
* starting address and protection from pte.
|
2008-12-19 03:41:30 +08:00
|
|
|
*/
|
2009-04-09 06:37:16 +08:00
|
|
|
if (follow_phys(vma, vma->vm_start, 0, &prot, &paddr)) {
|
2008-12-19 03:41:30 +08:00
|
|
|
WARN_ON_ONCE(1);
|
2008-12-20 05:47:28 +08:00
|
|
|
return -EINVAL;
|
2008-12-19 03:41:30 +08:00
|
|
|
}
|
2009-01-10 08:13:12 +08:00
|
|
|
pgprot = __pgprot(prot);
|
|
|
|
return reserve_pfn_range(paddr, vma_size, &pgprot, 1);
|
2008-12-19 03:41:30 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
2016-09-07 23:51:21 +08:00
|
|
|
* prot is passed in as a parameter for the new mapping. If the vma has
|
|
|
|
* a linear pfn mapping for the entire range, or no vma is provided,
|
|
|
|
* reserve the entire pfn + size range with single reserve_pfn_range
|
|
|
|
* call.
|
2008-12-19 03:41:30 +08:00
|
|
|
*/
|
2012-10-09 07:28:29 +08:00
|
|
|
int track_pfn_remap(struct vm_area_struct *vma, pgprot_t *prot,
|
2012-10-09 07:28:34 +08:00
|
|
|
unsigned long pfn, unsigned long addr, unsigned long size)
|
2008-12-19 03:41:30 +08:00
|
|
|
{
|
2012-10-09 07:28:23 +08:00
|
|
|
resource_size_t paddr = (resource_size_t)pfn << PAGE_SHIFT;
|
2014-11-03 21:01:55 +08:00
|
|
|
enum page_cache_mode pcm;
|
2008-12-19 03:41:30 +08:00
|
|
|
|
2012-10-09 07:28:23 +08:00
|
|
|
/* reserve the whole chunk starting from paddr */
|
2016-09-07 23:51:21 +08:00
|
|
|
if (!vma || (addr == vma->vm_start
|
|
|
|
&& size == (vma->vm_end - vma->vm_start))) {
|
2012-10-09 07:28:34 +08:00
|
|
|
int ret;
|
|
|
|
|
|
|
|
ret = reserve_pfn_range(paddr, size, prot, 0);
|
2016-09-07 23:51:21 +08:00
|
|
|
if (ret == 0 && vma)
|
2012-10-09 07:28:34 +08:00
|
|
|
vma->vm_flags |= VM_PAT;
|
|
|
|
return ret;
|
|
|
|
}
|
2008-12-19 03:41:30 +08:00
|
|
|
|
2015-05-26 16:28:15 +08:00
|
|
|
if (!pat_enabled())
|
2009-07-11 00:57:40 +08:00
|
|
|
return 0;
|
|
|
|
|
2012-10-09 07:28:29 +08:00
|
|
|
/*
|
|
|
|
* For anything smaller than the vma size we set prot based on the
|
|
|
|
* lookup.
|
|
|
|
*/
|
2014-11-03 21:01:55 +08:00
|
|
|
pcm = lookup_memtype(paddr);
|
2012-10-09 07:28:29 +08:00
|
|
|
|
|
|
|
/* Check memtype for the remaining pages */
|
|
|
|
while (size > PAGE_SIZE) {
|
|
|
|
size -= PAGE_SIZE;
|
|
|
|
paddr += PAGE_SIZE;
|
2014-11-03 21:01:55 +08:00
|
|
|
if (pcm != lookup_memtype(paddr))
|
2012-10-09 07:28:29 +08:00
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2016-01-26 01:25:15 +08:00
|
|
|
*prot = __pgprot((pgprot_val(*prot) & (~_PAGE_CACHE_MASK)) |
|
2014-11-03 21:01:55 +08:00
|
|
|
cachemode2protval(pcm));
|
2012-10-09 07:28:29 +08:00
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2016-10-27 01:43:43 +08:00
|
|
|
void track_pfn_insert(struct vm_area_struct *vma, pgprot_t *prot, pfn_t pfn)
|
2012-10-09 07:28:29 +08:00
|
|
|
{
|
2014-11-03 21:01:55 +08:00
|
|
|
enum page_cache_mode pcm;
|
2012-10-09 07:28:29 +08:00
|
|
|
|
2015-05-26 16:28:15 +08:00
|
|
|
if (!pat_enabled())
|
2016-10-27 01:43:43 +08:00
|
|
|
return;
|
2012-10-09 07:28:29 +08:00
|
|
|
|
|
|
|
/* Set prot based on lookup */
|
2016-01-16 08:56:43 +08:00
|
|
|
pcm = lookup_memtype(pfn_t_to_phys(pfn));
|
2016-01-26 01:25:15 +08:00
|
|
|
*prot = __pgprot((pgprot_val(*prot) & (~_PAGE_CACHE_MASK)) |
|
2014-11-03 21:01:55 +08:00
|
|
|
cachemode2protval(pcm));
|
2008-12-19 03:41:30 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
2012-10-09 07:28:29 +08:00
|
|
|
* untrack_pfn is called while unmapping a pfnmap for a region.
|
2008-12-19 03:41:30 +08:00
|
|
|
* untrack can be called for a specific region indicated by pfn and size or
|
2012-10-09 07:28:23 +08:00
|
|
|
* can be for the entire vma (in which case pfn, size are zero).
|
2008-12-19 03:41:30 +08:00
|
|
|
*/
|
2012-10-09 07:28:29 +08:00
|
|
|
void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn,
|
|
|
|
unsigned long size)
|
2008-12-19 03:41:30 +08:00
|
|
|
{
|
2008-12-24 02:10:40 +08:00
|
|
|
resource_size_t paddr;
|
2012-10-09 07:28:23 +08:00
|
|
|
unsigned long prot;
|
2008-12-19 03:41:30 +08:00
|
|
|
|
2016-09-07 23:51:21 +08:00
|
|
|
if (vma && !(vma->vm_flags & VM_PAT))
|
2008-12-19 03:41:30 +08:00
|
|
|
return;
|
2012-10-09 07:28:23 +08:00
|
|
|
|
|
|
|
/* free the chunk starting from pfn or the whole chunk */
|
|
|
|
paddr = (resource_size_t)pfn << PAGE_SHIFT;
|
|
|
|
if (!paddr && !size) {
|
|
|
|
if (follow_phys(vma, vma->vm_start, 0, &prot, &paddr)) {
|
|
|
|
WARN_ON_ONCE(1);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
size = vma->vm_end - vma->vm_start;
|
2008-12-19 03:41:30 +08:00
|
|
|
}
|
2012-10-09 07:28:23 +08:00
|
|
|
free_pfn_range(paddr, size);
|
2016-09-07 23:51:21 +08:00
|
|
|
if (vma)
|
|
|
|
vma->vm_flags &= ~VM_PAT;
|
2008-12-19 03:41:30 +08:00
|
|
|
}
|
|
|
|
|
2015-12-23 08:54:23 +08:00
|
|
|
/*
|
|
|
|
* untrack_pfn_moved is called, while mremapping a pfnmap for a new region,
|
|
|
|
* with the old vma after its pfnmap page table has been removed. The new
|
|
|
|
* vma has a new pfnmap to the same pfn & cache type with VM_PAT set.
|
|
|
|
*/
|
|
|
|
void untrack_pfn_moved(struct vm_area_struct *vma)
|
|
|
|
{
|
|
|
|
vma->vm_flags &= ~VM_PAT;
|
|
|
|
}
|
|
|
|
|
2008-12-19 03:41:32 +08:00
|
|
|
pgprot_t pgprot_writecombine(pgprot_t prot)
|
|
|
|
{
|
2015-06-05 00:55:11 +08:00
|
|
|
return __pgprot(pgprot_val(prot) |
|
2014-11-03 21:01:59 +08:00
|
|
|
cachemode2protval(_PAGE_CACHE_MODE_WC));
|
2008-12-19 03:41:32 +08:00
|
|
|
}
|
2009-02-28 21:09:27 +08:00
|
|
|
EXPORT_SYMBOL_GPL(pgprot_writecombine);
|
2008-12-19 03:41:32 +08:00
|
|
|
|
2015-06-05 00:55:18 +08:00
|
|
|
pgprot_t pgprot_writethrough(pgprot_t prot)
|
|
|
|
{
|
|
|
|
return __pgprot(pgprot_val(prot) |
|
|
|
|
cachemode2protval(_PAGE_CACHE_MODE_WT));
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL_GPL(pgprot_writethrough);
|
|
|
|
|
2008-08-06 22:23:08 +08:00
|
|
|
#if defined(CONFIG_DEBUG_FS) && defined(CONFIG_X86_PAT)
|
2008-07-19 07:08:14 +08:00
|
|
|
|
|
|
|
static struct memtype *memtype_get_idx(loff_t pos)
|
|
|
|
{
|
2010-02-11 03:57:06 +08:00
|
|
|
struct memtype *print_entry;
|
|
|
|
int ret;
|
2008-07-19 07:08:14 +08:00
|
|
|
|
2010-02-11 03:57:06 +08:00
|
|
|
print_entry = kzalloc(sizeof(struct memtype), GFP_KERNEL);
|
2008-07-19 07:08:14 +08:00
|
|
|
if (!print_entry)
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
spin_lock(&memtype_lock);
|
2010-02-11 07:26:07 +08:00
|
|
|
ret = rbt_memtype_copy_nth_element(print_entry, pos);
|
2008-07-19 07:08:14 +08:00
|
|
|
spin_unlock(&memtype_lock);
|
2008-09-30 19:20:45 +08:00
|
|
|
|
2010-02-11 03:57:06 +08:00
|
|
|
if (!ret) {
|
|
|
|
return print_entry;
|
|
|
|
} else {
|
|
|
|
kfree(print_entry);
|
|
|
|
return NULL;
|
|
|
|
}
|
2008-07-19 07:08:14 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static void *memtype_seq_start(struct seq_file *seq, loff_t *pos)
|
|
|
|
{
|
|
|
|
if (*pos == 0) {
|
|
|
|
++*pos;
|
2014-11-29 05:03:41 +08:00
|
|
|
seq_puts(seq, "PAT memtype list:\n");
|
2008-07-19 07:08:14 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
return memtype_get_idx(*pos);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void *memtype_seq_next(struct seq_file *seq, void *v, loff_t *pos)
|
|
|
|
{
|
|
|
|
++*pos;
|
|
|
|
return memtype_get_idx(*pos);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void memtype_seq_stop(struct seq_file *seq, void *v)
|
|
|
|
{
|
|
|
|
}
|
|
|
|
|
|
|
|
static int memtype_seq_show(struct seq_file *seq, void *v)
|
|
|
|
{
|
|
|
|
struct memtype *print_entry = (struct memtype *)v;
|
|
|
|
|
|
|
|
seq_printf(seq, "%s @ 0x%Lx-0x%Lx\n", cattr_name(print_entry->type),
|
|
|
|
print_entry->start, print_entry->end);
|
|
|
|
kfree(print_entry);
|
2008-09-30 19:20:45 +08:00
|
|
|
|
2008-07-19 07:08:14 +08:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2009-09-04 21:53:09 +08:00
|
|
|
static const struct seq_operations memtype_seq_ops = {
|
2008-07-19 07:08:14 +08:00
|
|
|
.start = memtype_seq_start,
|
|
|
|
.next = memtype_seq_next,
|
|
|
|
.stop = memtype_seq_stop,
|
|
|
|
.show = memtype_seq_show,
|
|
|
|
};
|
|
|
|
|
|
|
|
static int memtype_seq_open(struct inode *inode, struct file *file)
|
|
|
|
{
|
|
|
|
return seq_open(file, &memtype_seq_ops);
|
|
|
|
}
|
|
|
|
|
|
|
|
static const struct file_operations memtype_fops = {
|
|
|
|
.open = memtype_seq_open,
|
|
|
|
.read = seq_read,
|
|
|
|
.llseek = seq_lseek,
|
|
|
|
.release = seq_release,
|
|
|
|
};
|
|
|
|
|
|
|
|
static int __init pat_memtype_list_init(void)
|
|
|
|
{
|
2015-05-26 16:28:15 +08:00
|
|
|
if (pat_enabled()) {
|
2009-11-26 19:53:48 +08:00
|
|
|
debugfs_create_file("pat_memtype_list", S_IRUSR,
|
|
|
|
arch_debugfs_dir, NULL, &memtype_fops);
|
|
|
|
}
|
2008-07-19 07:08:14 +08:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
late_initcall(pat_memtype_list_init);
|
|
|
|
|
2008-08-06 22:23:08 +08:00
|
|
|
#endif /* CONFIG_DEBUG_FS && CONFIG_X86_PAT */
|