2005-04-17 06:20:36 +08:00
|
|
|
/* Generic MTRR (Memory Type Range Register) driver.
|
|
|
|
|
|
|
|
Copyright (C) 1997-2000 Richard Gooch
|
|
|
|
Copyright (c) 2002 Patrick Mochel
|
|
|
|
|
|
|
|
This library is free software; you can redistribute it and/or
|
|
|
|
modify it under the terms of the GNU Library General Public
|
|
|
|
License as published by the Free Software Foundation; either
|
|
|
|
version 2 of the License, or (at your option) any later version.
|
|
|
|
|
|
|
|
This library is distributed in the hope that it will be useful,
|
|
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
|
|
Library General Public License for more details.
|
|
|
|
|
|
|
|
You should have received a copy of the GNU Library General Public
|
|
|
|
License along with this library; if not, write to the Free
|
|
|
|
Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
|
|
|
|
|
|
|
Richard Gooch may be reached by email at rgooch@atnf.csiro.au
|
|
|
|
The postal address is:
|
|
|
|
Richard Gooch, c/o ATNF, P. O. Box 76, Epping, N.S.W., 2121, Australia.
|
|
|
|
|
|
|
|
Source: "Pentium Pro Family Developer's Manual, Volume 3:
|
|
|
|
Operating System Writer's Guide" (Intel document number 242692),
|
|
|
|
section 11.11.7
|
|
|
|
|
2009-07-04 10:26:28 +08:00
|
|
|
This was cleaned and made readable by Patrick Mochel <mochel@osdl.org>
|
|
|
|
on 6-7 March 2002.
|
|
|
|
Source: Intel Architecture Software Developers Manual, Volume 3:
|
2005-04-17 06:20:36 +08:00
|
|
|
System Programming Guide; Section 9.11. (1997 edition - PPro).
|
|
|
|
*/
|
|
|
|
|
2009-07-04 10:26:28 +08:00
|
|
|
#define DEBUG
|
|
|
|
|
|
|
|
#include <linux/types.h> /* FIXME: kvm_para.h needs this */
|
|
|
|
|
2010-07-31 02:46:42 +08:00
|
|
|
#include <linux/stop_machine.h>
|
2009-07-04 10:26:28 +08:00
|
|
|
#include <linux/kvm_para.h>
|
|
|
|
#include <linux/uaccess.h>
|
2005-04-17 06:20:36 +08:00
|
|
|
#include <linux/module.h>
|
2009-07-04 10:26:28 +08:00
|
|
|
#include <linux/mutex.h>
|
2005-04-17 06:20:36 +08:00
|
|
|
#include <linux/init.h>
|
2009-07-04 10:26:28 +08:00
|
|
|
#include <linux/sort.h>
|
|
|
|
#include <linux/cpu.h>
|
2005-04-17 06:20:36 +08:00
|
|
|
#include <linux/pci.h>
|
|
|
|
#include <linux/smp.h>
|
2011-03-24 05:15:54 +08:00
|
|
|
#include <linux/syscore_ops.h>
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2009-07-04 10:26:28 +08:00
|
|
|
#include <asm/processor.h>
|
x86, 32-bit: trim memory not covered by wb mtrrs
On some machines, buggy BIOSes don't properly setup WB MTRRs to cover all
available RAM, meaning the last few megs (or even gigs) of memory will be
marked uncached. Since Linux tends to allocate from high memory addresses
first, this causes the machine to be unusably slow as soon as the kernel
starts really using memory (i.e. right around init time).
This patch works around the problem by scanning the MTRRs at boot and
figuring out whether the current end_pfn value (setup by early e820 code)
goes beyond the highest WB MTRR range, and if so, trimming it to match. A
fairly obnoxious KERN_WARNING is printed too, letting the user know that
not all of their memory is available due to a likely BIOS bug.
Something similar could be done on i386 if needed, but the boot ordering
would be slightly different, since the MTRR code on i386 depends on the
boot_cpu_data structure being setup.
This patch fixes a bug in the last patch that caused the code to run on
non-Intel machines (AMD machines apparently don't need it and it's untested
on other non-Intel machines, so best keep it off).
Further enhancements and fixes from:
Yinghai Lu <Yinghai.Lu@Sun.COM>
Andi Kleen <ak@suse.de>
Signed-off-by: Jesse Barnes <jesse.barnes@intel.com>
Tested-by: Justin Piszcz <jpiszcz@lucidpixels.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
2008-01-30 20:33:18 +08:00
|
|
|
#include <asm/e820.h>
|
2005-04-17 06:20:36 +08:00
|
|
|
#include <asm/mtrr.h>
|
|
|
|
#include <asm/msr.h>
|
2013-05-14 07:58:40 +08:00
|
|
|
#include <asm/pat.h>
|
2009-07-04 10:26:28 +08:00
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
#include "mtrr.h"
|
|
|
|
|
2013-05-14 07:58:40 +08:00
|
|
|
/* arch_phys_wc_add returns an MTRR register index plus this offset. */
|
|
|
|
#define MTRR_TO_PHYS_WC_OFFSET 1000
|
|
|
|
|
2009-07-04 10:26:28 +08:00
|
|
|
u32 num_var_ranges;
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2008-10-09 16:01:52 +08:00
|
|
|
unsigned int mtrr_usage_table[MTRR_MAX_VAR_RANGES];
|
2006-03-26 17:37:14 +08:00
|
|
|
static DEFINE_MUTEX(mtrr_mutex);
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2007-02-13 20:26:23 +08:00
|
|
|
u64 size_or_mask, size_and_mask;
|
2009-08-22 08:00:02 +08:00
|
|
|
static bool mtrr_aps_delayed_init;
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2010-02-01 03:16:34 +08:00
|
|
|
static const struct mtrr_ops *mtrr_ops[X86_VENDOR_NUM];
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2010-02-01 03:16:34 +08:00
|
|
|
const struct mtrr_ops *mtrr_if;
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
static void set_mtrr(unsigned int reg, unsigned long base,
|
|
|
|
unsigned long size, mtrr_type type);
|
|
|
|
|
2010-02-01 03:16:34 +08:00
|
|
|
void set_mtrr_ops(const struct mtrr_ops *ops)
|
2005-04-17 06:20:36 +08:00
|
|
|
{
|
|
|
|
if (ops->vendor && ops->vendor < X86_VENDOR_NUM)
|
|
|
|
mtrr_ops[ops->vendor] = ops;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Returns non-zero if we have the write-combining memory type */
|
|
|
|
static int have_wrcomb(void)
|
|
|
|
{
|
|
|
|
struct pci_dev *dev;
|
2009-07-04 10:26:28 +08:00
|
|
|
|
|
|
|
dev = pci_get_class(PCI_CLASS_BRIDGE_HOST << 8, NULL);
|
|
|
|
if (dev != NULL) {
|
|
|
|
/*
|
|
|
|
* ServerWorks LE chipsets < rev 6 have problems with
|
|
|
|
* write-combining. Don't allow it and leave room for other
|
|
|
|
* chipsets to be tagged
|
|
|
|
*/
|
2005-04-17 06:20:36 +08:00
|
|
|
if (dev->vendor == PCI_VENDOR_ID_SERVERWORKS &&
|
2011-07-02 02:42:08 +08:00
|
|
|
dev->device == PCI_DEVICE_ID_SERVERWORKS_LE &&
|
|
|
|
dev->revision <= 5) {
|
|
|
|
pr_info("mtrr: Serverworks LE rev < 6 detected. Write-combining disabled.\n");
|
|
|
|
pci_dev_put(dev);
|
|
|
|
return 0;
|
2005-04-17 06:20:36 +08:00
|
|
|
}
|
2009-07-04 10:26:28 +08:00
|
|
|
/*
|
|
|
|
* Intel 450NX errata # 23. Non ascending cacheline evictions to
|
|
|
|
* write combining memory may resulting in data corruption
|
|
|
|
*/
|
2005-04-17 06:20:36 +08:00
|
|
|
if (dev->vendor == PCI_VENDOR_ID_INTEL &&
|
|
|
|
dev->device == PCI_DEVICE_ID_INTEL_82451NX) {
|
2009-07-04 10:26:28 +08:00
|
|
|
pr_info("mtrr: Intel 450NX MMC detected. Write-combining disabled.\n");
|
2005-04-17 06:20:36 +08:00
|
|
|
pci_dev_put(dev);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
pci_dev_put(dev);
|
2009-07-04 10:26:28 +08:00
|
|
|
}
|
|
|
|
return mtrr_if->have_wrcomb ? mtrr_if->have_wrcomb() : 0;
|
2005-04-17 06:20:36 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/* This function returns the number of variable MTRRs */
|
|
|
|
static void __init set_num_var_ranges(void)
|
|
|
|
{
|
|
|
|
unsigned long config = 0, dummy;
|
|
|
|
|
2009-07-04 10:26:28 +08:00
|
|
|
if (use_intel())
|
2009-05-14 14:36:12 +08:00
|
|
|
rdmsr(MSR_MTRRcap, config, dummy);
|
2009-07-04 10:26:28 +08:00
|
|
|
else if (is_cpu(AMD))
|
2005-04-17 06:20:36 +08:00
|
|
|
config = 2;
|
|
|
|
else if (is_cpu(CYRIX) || is_cpu(CENTAUR))
|
|
|
|
config = 8;
|
2009-07-04 10:26:28 +08:00
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
num_var_ranges = config & 0xff;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void __init init_table(void)
|
|
|
|
{
|
|
|
|
int i, max;
|
|
|
|
|
|
|
|
max = num_var_ranges;
|
|
|
|
for (i = 0; i < max; i++)
|
x86, 32-bit: trim memory not covered by wb mtrrs
On some machines, buggy BIOSes don't properly setup WB MTRRs to cover all
available RAM, meaning the last few megs (or even gigs) of memory will be
marked uncached. Since Linux tends to allocate from high memory addresses
first, this causes the machine to be unusably slow as soon as the kernel
starts really using memory (i.e. right around init time).
This patch works around the problem by scanning the MTRRs at boot and
figuring out whether the current end_pfn value (setup by early e820 code)
goes beyond the highest WB MTRR range, and if so, trimming it to match. A
fairly obnoxious KERN_WARNING is printed too, letting the user know that
not all of their memory is available due to a likely BIOS bug.
Something similar could be done on i386 if needed, but the boot ordering
would be slightly different, since the MTRR code on i386 depends on the
boot_cpu_data structure being setup.
This patch fixes a bug in the last patch that caused the code to run on
non-Intel machines (AMD machines apparently don't need it and it's untested
on other non-Intel machines, so best keep it off).
Further enhancements and fixes from:
Yinghai Lu <Yinghai.Lu@Sun.COM>
Andi Kleen <ak@suse.de>
Signed-off-by: Jesse Barnes <jesse.barnes@intel.com>
Tested-by: Justin Piszcz <jpiszcz@lucidpixels.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
2008-01-30 20:33:18 +08:00
|
|
|
mtrr_usage_table[i] = 1;
|
2005-04-17 06:20:36 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
struct set_mtrr_data {
|
|
|
|
unsigned long smp_base;
|
|
|
|
unsigned long smp_size;
|
|
|
|
unsigned int smp_reg;
|
|
|
|
mtrr_type smp_type;
|
|
|
|
};
|
|
|
|
|
2009-07-04 10:26:28 +08:00
|
|
|
/**
|
2011-06-24 02:19:29 +08:00
|
|
|
* mtrr_rendezvous_handler - Work done in the synchronization handler. Executed
|
|
|
|
* by all the CPUs.
|
2010-03-06 01:52:52 +08:00
|
|
|
* @info: pointer to mtrr configuration data
|
2009-07-04 10:26:28 +08:00
|
|
|
*
|
|
|
|
* Returns nothing.
|
|
|
|
*/
|
2011-06-24 02:19:29 +08:00
|
|
|
static int mtrr_rendezvous_handler(void *info)
|
2005-04-17 06:20:36 +08:00
|
|
|
{
|
|
|
|
struct set_mtrr_data *data = info;
|
|
|
|
|
2011-06-24 02:19:29 +08:00
|
|
|
/*
|
|
|
|
* We use this same function to initialize the mtrrs during boot,
|
|
|
|
* resume, runtime cpu online and on an explicit request to set a
|
|
|
|
* specific MTRR.
|
|
|
|
*
|
|
|
|
* During boot or suspend, the state of the boot cpu's mtrrs has been
|
|
|
|
* saved, and we want to replicate that across all the cpus that come
|
|
|
|
* online (either at the end of boot or resume or during a runtime cpu
|
|
|
|
* online). If we're doing that, @reg is set to something special and on
|
|
|
|
* all the cpu's we do mtrr_if->set_all() (On the logical cpu that
|
|
|
|
* started the boot/resume sequence, this might be a duplicate
|
|
|
|
* set_all()).
|
|
|
|
*/
|
2009-07-04 10:26:28 +08:00
|
|
|
if (data->smp_reg != ~0U) {
|
|
|
|
mtrr_if->set(data->smp_reg, data->smp_base,
|
2005-04-17 06:20:36 +08:00
|
|
|
data->smp_size, data->smp_type);
|
2011-06-24 02:19:29 +08:00
|
|
|
} else if (mtrr_aps_delayed_init || !cpu_online(smp_processor_id())) {
|
2005-04-17 06:20:36 +08:00
|
|
|
mtrr_if->set_all();
|
2009-07-04 10:26:28 +08:00
|
|
|
}
|
2010-07-31 02:46:42 +08:00
|
|
|
return 0;
|
2007-11-10 05:39:38 +08:00
|
|
|
}
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2009-07-04 10:26:28 +08:00
|
|
|
static inline int types_compatible(mtrr_type type1, mtrr_type type2)
|
|
|
|
{
|
[PATCH] i386: fix MTRR code
Until not so long ago, there were system log messages pointing to
inconsistent MTRR setup of the video frame buffer caused by the way vesafb
and X worked. While vesafb was fixed meanwhile, I believe fixing it there
only hides a shortcoming in the MTRR code itself, in that that code is not
symmetric with respect to the ordering of attempts to set up two (or more)
regions where one contains the other. In the current shape, it permits
only setting up sub-regions of pre-exisiting ones. The patch below makes
this symmetric.
While working on that I noticed a few more inconsistencies in that code,
namely
- use of 'unsigned int' for sizes in many, but not all places (the patch
is converting this to use 'unsigned long' everywhere, which specifically
might be necessary for x86-64 once a processor supporting more than 44
physical address bits would become available)
- the code to correct inconsistent settings during secondary processor
startup tried (if necessary) to correct, among other things, the value
in IA32_MTRR_DEF_TYPE, however the newly computed value would never get
used (i.e. stored in the respective MSR)
- the generic range validation code checked that the end of the
to-be-added range would be above 1MB; the value checked should have been
the start of the range
- when contained regions are detected, previously this was allowed only
when the old region was uncacheable; this can be symmetric (i.e. the new
region can also be uncacheable) and even further as per Intel's
documentation write-trough and write-back for either region is also
compatible with the respective opposite in the other
Signed-off-by: Jan Beulich <jbeulich@novell.com>
Signed-off-by: Andi Kleen <ak@suse.de>
2006-12-07 09:14:09 +08:00
|
|
|
return type1 == MTRR_TYPE_UNCACHABLE ||
|
|
|
|
type2 == MTRR_TYPE_UNCACHABLE ||
|
|
|
|
(type1 == MTRR_TYPE_WRTHROUGH && type2 == MTRR_TYPE_WRBACK) ||
|
|
|
|
(type1 == MTRR_TYPE_WRBACK && type2 == MTRR_TYPE_WRTHROUGH);
|
|
|
|
}
|
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
/**
|
|
|
|
* set_mtrr - update mtrrs on all processors
|
|
|
|
* @reg: mtrr in question
|
|
|
|
* @base: mtrr base
|
|
|
|
* @size: mtrr size
|
|
|
|
* @type: mtrr type
|
|
|
|
*
|
|
|
|
* This is kinda tricky, but fortunately, Intel spelled it out for us cleanly:
|
2009-07-04 10:26:28 +08:00
|
|
|
*
|
2010-07-31 02:46:42 +08:00
|
|
|
* 1. Queue work to do the following on all processors:
|
2005-04-17 06:20:36 +08:00
|
|
|
* 2. Disable Interrupts
|
2009-07-04 10:26:28 +08:00
|
|
|
* 3. Wait for all procs to do so
|
2005-04-17 06:20:36 +08:00
|
|
|
* 4. Enter no-fill cache mode
|
|
|
|
* 5. Flush caches
|
|
|
|
* 6. Clear PGE bit
|
|
|
|
* 7. Flush all TLBs
|
|
|
|
* 8. Disable all range registers
|
|
|
|
* 9. Update the MTRRs
|
|
|
|
* 10. Enable all range registers
|
|
|
|
* 11. Flush all TLBs and caches again
|
|
|
|
* 12. Enter normal cache mode and reenable caching
|
2009-07-04 10:26:28 +08:00
|
|
|
* 13. Set PGE
|
2005-04-17 06:20:36 +08:00
|
|
|
* 14. Wait for buddies to catch up
|
|
|
|
* 15. Enable interrupts.
|
2009-07-04 10:26:28 +08:00
|
|
|
*
|
2011-06-24 02:19:29 +08:00
|
|
|
* What does that mean for us? Well, stop_machine() will ensure that
|
|
|
|
* the rendezvous handler is started on each CPU. And in lockstep they
|
|
|
|
* do the state transition of disabling interrupts, updating MTRR's
|
|
|
|
* (the CPU vendors may each do it differently, so we call mtrr_if->set()
|
|
|
|
* callback and let them take care of it.) and enabling interrupts.
|
2005-04-17 06:20:36 +08:00
|
|
|
*
|
|
|
|
* Note that the mechanism is the same for UP systems, too; all the SMP stuff
|
|
|
|
* becomes nops.
|
|
|
|
*/
|
2009-07-04 10:26:28 +08:00
|
|
|
static void
|
|
|
|
set_mtrr(unsigned int reg, unsigned long base, unsigned long size, mtrr_type type)
|
2005-04-17 06:20:36 +08:00
|
|
|
{
|
2011-06-24 02:19:29 +08:00
|
|
|
struct set_mtrr_data data = { .smp_reg = reg,
|
|
|
|
.smp_base = base,
|
|
|
|
.smp_size = size,
|
|
|
|
.smp_type = type
|
|
|
|
};
|
2010-07-31 02:46:42 +08:00
|
|
|
|
2011-06-24 02:19:29 +08:00
|
|
|
stop_machine(mtrr_rendezvous_handler, &data, cpu_online_mask);
|
|
|
|
}
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2011-06-24 02:19:29 +08:00
|
|
|
static void set_mtrr_from_inactive_cpu(unsigned int reg, unsigned long base,
|
|
|
|
unsigned long size, mtrr_type type)
|
|
|
|
{
|
|
|
|
struct set_mtrr_data data = { .smp_reg = reg,
|
|
|
|
.smp_base = base,
|
|
|
|
.smp_size = size,
|
|
|
|
.smp_type = type
|
|
|
|
};
|
|
|
|
|
|
|
|
stop_machine_from_inactive_cpu(mtrr_rendezvous_handler, &data,
|
|
|
|
cpu_callout_mask);
|
2005-04-17 06:20:36 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
2009-07-04 10:26:28 +08:00
|
|
|
* mtrr_add_page - Add a memory type region
|
|
|
|
* @base: Physical base address of region in pages (in units of 4 kB!)
|
|
|
|
* @size: Physical size of region in pages (4 kB)
|
|
|
|
* @type: Type of MTRR desired
|
|
|
|
* @increment: If this is true do usage counting on the region
|
2005-04-17 06:20:36 +08:00
|
|
|
*
|
2009-07-04 10:26:28 +08:00
|
|
|
* Memory type region registers control the caching on newer Intel and
|
|
|
|
* non Intel processors. This function allows drivers to request an
|
|
|
|
* MTRR is added. The details and hardware specifics of each processor's
|
|
|
|
* implementation are hidden from the caller, but nevertheless the
|
|
|
|
* caller should expect to need to provide a power of two size on an
|
|
|
|
* equivalent power of two boundary.
|
2005-04-17 06:20:36 +08:00
|
|
|
*
|
2009-07-04 10:26:28 +08:00
|
|
|
* If the region cannot be added either because all regions are in use
|
|
|
|
* or the CPU cannot support it a negative value is returned. On success
|
|
|
|
* the register number for this entry is returned, but should be treated
|
|
|
|
* as a cookie only.
|
2005-04-17 06:20:36 +08:00
|
|
|
*
|
2009-07-04 10:26:28 +08:00
|
|
|
* On a multiprocessor machine the changes are made to all processors.
|
|
|
|
* This is required on x86 by the Intel processors.
|
2005-04-17 06:20:36 +08:00
|
|
|
*
|
2009-07-04 10:26:28 +08:00
|
|
|
* The available types are
|
2005-04-17 06:20:36 +08:00
|
|
|
*
|
2009-07-04 10:26:28 +08:00
|
|
|
* %MTRR_TYPE_UNCACHABLE - No caching
|
2005-04-17 06:20:36 +08:00
|
|
|
*
|
2009-07-04 10:26:28 +08:00
|
|
|
* %MTRR_TYPE_WRBACK - Write data back in bursts whenever
|
2005-04-17 06:20:36 +08:00
|
|
|
*
|
2009-07-04 10:26:28 +08:00
|
|
|
* %MTRR_TYPE_WRCOMB - Write data back soon but allow bursts
|
2005-04-17 06:20:36 +08:00
|
|
|
*
|
2009-07-04 10:26:28 +08:00
|
|
|
* %MTRR_TYPE_WRTHROUGH - Cache reads but not writes
|
2005-04-17 06:20:36 +08:00
|
|
|
*
|
2009-07-04 10:26:28 +08:00
|
|
|
* BUGS: Needs a quiet flag for the cases where drivers do not mind
|
|
|
|
* failures and do not wish system log messages to be sent.
|
2005-04-17 06:20:36 +08:00
|
|
|
*/
|
2009-07-04 10:26:28 +08:00
|
|
|
int mtrr_add_page(unsigned long base, unsigned long size,
|
2008-01-30 20:30:31 +08:00
|
|
|
unsigned int type, bool increment)
|
2005-04-17 06:20:36 +08:00
|
|
|
{
|
2009-07-04 10:26:28 +08:00
|
|
|
unsigned long lbase, lsize;
|
[PATCH] i386: fix MTRR code
Until not so long ago, there were system log messages pointing to
inconsistent MTRR setup of the video frame buffer caused by the way vesafb
and X worked. While vesafb was fixed meanwhile, I believe fixing it there
only hides a shortcoming in the MTRR code itself, in that that code is not
symmetric with respect to the ordering of attempts to set up two (or more)
regions where one contains the other. In the current shape, it permits
only setting up sub-regions of pre-exisiting ones. The patch below makes
this symmetric.
While working on that I noticed a few more inconsistencies in that code,
namely
- use of 'unsigned int' for sizes in many, but not all places (the patch
is converting this to use 'unsigned long' everywhere, which specifically
might be necessary for x86-64 once a processor supporting more than 44
physical address bits would become available)
- the code to correct inconsistent settings during secondary processor
startup tried (if necessary) to correct, among other things, the value
in IA32_MTRR_DEF_TYPE, however the newly computed value would never get
used (i.e. stored in the respective MSR)
- the generic range validation code checked that the end of the
to-be-added range would be above 1MB; the value checked should have been
the start of the range
- when contained regions are detected, previously this was allowed only
when the old region was uncacheable; this can be symmetric (i.e. the new
region can also be uncacheable) and even further as per Intel's
documentation write-trough and write-back for either region is also
compatible with the respective opposite in the other
Signed-off-by: Jan Beulich <jbeulich@novell.com>
Signed-off-by: Andi Kleen <ak@suse.de>
2006-12-07 09:14:09 +08:00
|
|
|
int i, replace, error;
|
2005-04-17 06:20:36 +08:00
|
|
|
mtrr_type ltype;
|
|
|
|
|
|
|
|
if (!mtrr_if)
|
|
|
|
return -ENXIO;
|
2009-07-04 10:26:28 +08:00
|
|
|
|
|
|
|
error = mtrr_if->validate_add_page(base, size, type);
|
|
|
|
if (error)
|
2005-04-17 06:20:36 +08:00
|
|
|
return error;
|
|
|
|
|
|
|
|
if (type >= MTRR_NUM_TYPES) {
|
2009-07-04 10:26:28 +08:00
|
|
|
pr_warning("mtrr: type: %u invalid\n", type);
|
2005-04-17 06:20:36 +08:00
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2009-07-04 10:26:28 +08:00
|
|
|
/* If the type is WC, check that this processor supports it */
|
2005-04-17 06:20:36 +08:00
|
|
|
if ((type == MTRR_TYPE_WRCOMB) && !have_wrcomb()) {
|
2009-07-04 10:26:28 +08:00
|
|
|
pr_warning("mtrr: your processor doesn't support write-combining\n");
|
2005-04-17 06:20:36 +08:00
|
|
|
return -ENOSYS;
|
|
|
|
}
|
|
|
|
|
[PATCH] i386: fix MTRR code
Until not so long ago, there were system log messages pointing to
inconsistent MTRR setup of the video frame buffer caused by the way vesafb
and X worked. While vesafb was fixed meanwhile, I believe fixing it there
only hides a shortcoming in the MTRR code itself, in that that code is not
symmetric with respect to the ordering of attempts to set up two (or more)
regions where one contains the other. In the current shape, it permits
only setting up sub-regions of pre-exisiting ones. The patch below makes
this symmetric.
While working on that I noticed a few more inconsistencies in that code,
namely
- use of 'unsigned int' for sizes in many, but not all places (the patch
is converting this to use 'unsigned long' everywhere, which specifically
might be necessary for x86-64 once a processor supporting more than 44
physical address bits would become available)
- the code to correct inconsistent settings during secondary processor
startup tried (if necessary) to correct, among other things, the value
in IA32_MTRR_DEF_TYPE, however the newly computed value would never get
used (i.e. stored in the respective MSR)
- the generic range validation code checked that the end of the
to-be-added range would be above 1MB; the value checked should have been
the start of the range
- when contained regions are detected, previously this was allowed only
when the old region was uncacheable; this can be symmetric (i.e. the new
region can also be uncacheable) and even further as per Intel's
documentation write-trough and write-back for either region is also
compatible with the respective opposite in the other
Signed-off-by: Jan Beulich <jbeulich@novell.com>
Signed-off-by: Andi Kleen <ak@suse.de>
2006-12-07 09:14:09 +08:00
|
|
|
if (!size) {
|
2009-07-04 10:26:28 +08:00
|
|
|
pr_warning("mtrr: zero sized request\n");
|
[PATCH] i386: fix MTRR code
Until not so long ago, there were system log messages pointing to
inconsistent MTRR setup of the video frame buffer caused by the way vesafb
and X worked. While vesafb was fixed meanwhile, I believe fixing it there
only hides a shortcoming in the MTRR code itself, in that that code is not
symmetric with respect to the ordering of attempts to set up two (or more)
regions where one contains the other. In the current shape, it permits
only setting up sub-regions of pre-exisiting ones. The patch below makes
this symmetric.
While working on that I noticed a few more inconsistencies in that code,
namely
- use of 'unsigned int' for sizes in many, but not all places (the patch
is converting this to use 'unsigned long' everywhere, which specifically
might be necessary for x86-64 once a processor supporting more than 44
physical address bits would become available)
- the code to correct inconsistent settings during secondary processor
startup tried (if necessary) to correct, among other things, the value
in IA32_MTRR_DEF_TYPE, however the newly computed value would never get
used (i.e. stored in the respective MSR)
- the generic range validation code checked that the end of the
to-be-added range would be above 1MB; the value checked should have been
the start of the range
- when contained regions are detected, previously this was allowed only
when the old region was uncacheable; this can be symmetric (i.e. the new
region can also be uncacheable) and even further as per Intel's
documentation write-trough and write-back for either region is also
compatible with the respective opposite in the other
Signed-off-by: Jan Beulich <jbeulich@novell.com>
Signed-off-by: Andi Kleen <ak@suse.de>
2006-12-07 09:14:09 +08:00
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
x86: Fix /proc/mtrr with base/size more than 44bits
On one sytem that mtrr range is more then 44bits, in dmesg we have
[ 0.000000] MTRR default type: write-back
[ 0.000000] MTRR fixed ranges enabled:
[ 0.000000] 00000-9FFFF write-back
[ 0.000000] A0000-BFFFF uncachable
[ 0.000000] C0000-DFFFF write-through
[ 0.000000] E0000-FFFFF write-protect
[ 0.000000] MTRR variable ranges enabled:
[ 0.000000] 0 [000080000000-0000FFFFFFFF] mask 3FFF80000000 uncachable
[ 0.000000] 1 [380000000000-38FFFFFFFFFF] mask 3F0000000000 uncachable
[ 0.000000] 2 [000099000000-000099FFFFFF] mask 3FFFFF000000 write-through
[ 0.000000] 3 [00009A000000-00009AFFFFFF] mask 3FFFFF000000 write-through
[ 0.000000] 4 [381FFA000000-381FFBFFFFFF] mask 3FFFFE000000 write-through
[ 0.000000] 5 [381FFC000000-381FFC0FFFFF] mask 3FFFFFF00000 write-through
[ 0.000000] 6 [0000AD000000-0000ADFFFFFF] mask 3FFFFF000000 write-through
[ 0.000000] 7 [0000BD000000-0000BDFFFFFF] mask 3FFFFF000000 write-through
[ 0.000000] 8 disabled
[ 0.000000] 9 disabled
but /proc/mtrr report wrong:
reg00: base=0x080000000 ( 2048MB), size= 2048MB, count=1: uncachable
reg01: base=0x80000000000 (8388608MB), size=1048576MB, count=1: uncachable
reg02: base=0x099000000 ( 2448MB), size= 16MB, count=1: write-through
reg03: base=0x09a000000 ( 2464MB), size= 16MB, count=1: write-through
reg04: base=0x81ffa000000 (8519584MB), size= 32MB, count=1: write-through
reg05: base=0x81ffc000000 (8519616MB), size= 1MB, count=1: write-through
reg06: base=0x0ad000000 ( 2768MB), size= 16MB, count=1: write-through
reg07: base=0x0bd000000 ( 3024MB), size= 16MB, count=1: write-through
reg08: base=0x09b000000 ( 2480MB), size= 16MB, count=1: write-combining
so bit 44 and bit 45 get cut off.
We have problems in arch/x86/kernel/cpu/mtrr/generic.c::generic_get_mtrr().
1. for base, we miss cast base_lo to 64bit before shifting.
Fix that by adding u64 casting.
2. for size, it only can handle 44 bits aka 32bits + page_shift
Fix that with 64bit mask instead of 32bit mask_lo, then range could be
more than 44bits.
At the same time, we need to update size_or_mask for old cpus that does
support cpuid 0x80000008 to get phys_addr. Need to set high 32bits
to all 1s, otherwise will not get correct size for them.
Also fix mtrr_add_page: it should check base and (base + size - 1)
instead of base and size, as base and size could be small but
base + size could bigger enough to be out of boundary. We can
use boot_cpu_data.x86_phys_bits directly to avoid size_or_mask.
So When are we going to have size more than 44bits? that is 16TiB.
after patch we have right ouput:
reg00: base=0x080000000 ( 2048MB), size= 2048MB, count=1: uncachable
reg01: base=0x380000000000 (58720256MB), size=1048576MB, count=1: uncachable
reg02: base=0x099000000 ( 2448MB), size= 16MB, count=1: write-through
reg03: base=0x09a000000 ( 2464MB), size= 16MB, count=1: write-through
reg04: base=0x381ffa000000 (58851232MB), size= 32MB, count=1: write-through
reg05: base=0x381ffc000000 (58851264MB), size= 1MB, count=1: write-through
reg06: base=0x0ad000000 ( 2768MB), size= 16MB, count=1: write-through
reg07: base=0x0bd000000 ( 3024MB), size= 16MB, count=1: write-through
reg08: base=0x09b000000 ( 2480MB), size= 16MB, count=1: write-combining
-v2: simply checking in mtrr_add_page according to hpa.
[ hpa: This probably wants to go into -stable only after having sat in
mainline for a bit. It is not a regression. ]
Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Link: http://lkml.kernel.org/r/1371162815-29931-1-git-send-email-yinghai@kernel.org
Cc: <stable@vger.kernel.org>
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
2013-06-14 06:33:35 +08:00
|
|
|
if ((base | (base + size - 1)) >>
|
|
|
|
(boot_cpu_data.x86_phys_bits - PAGE_SHIFT)) {
|
2009-07-04 10:26:28 +08:00
|
|
|
pr_warning("mtrr: base or size exceeds the MTRR width\n");
|
2005-04-17 06:20:36 +08:00
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
error = -EINVAL;
|
[PATCH] i386: fix MTRR code
Until not so long ago, there were system log messages pointing to
inconsistent MTRR setup of the video frame buffer caused by the way vesafb
and X worked. While vesafb was fixed meanwhile, I believe fixing it there
only hides a shortcoming in the MTRR code itself, in that that code is not
symmetric with respect to the ordering of attempts to set up two (or more)
regions where one contains the other. In the current shape, it permits
only setting up sub-regions of pre-exisiting ones. The patch below makes
this symmetric.
While working on that I noticed a few more inconsistencies in that code,
namely
- use of 'unsigned int' for sizes in many, but not all places (the patch
is converting this to use 'unsigned long' everywhere, which specifically
might be necessary for x86-64 once a processor supporting more than 44
physical address bits would become available)
- the code to correct inconsistent settings during secondary processor
startup tried (if necessary) to correct, among other things, the value
in IA32_MTRR_DEF_TYPE, however the newly computed value would never get
used (i.e. stored in the respective MSR)
- the generic range validation code checked that the end of the
to-be-added range would be above 1MB; the value checked should have been
the start of the range
- when contained regions are detected, previously this was allowed only
when the old region was uncacheable; this can be symmetric (i.e. the new
region can also be uncacheable) and even further as per Intel's
documentation write-trough and write-back for either region is also
compatible with the respective opposite in the other
Signed-off-by: Jan Beulich <jbeulich@novell.com>
Signed-off-by: Andi Kleen <ak@suse.de>
2006-12-07 09:14:09 +08:00
|
|
|
replace = -1;
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2005-07-08 08:56:38 +08:00
|
|
|
/* No CPU hotplug when we change MTRR entries */
|
2008-01-26 04:08:02 +08:00
|
|
|
get_online_cpus();
|
2009-07-04 10:26:28 +08:00
|
|
|
|
|
|
|
/* Search for existing MTRR */
|
2006-03-26 17:37:14 +08:00
|
|
|
mutex_lock(&mtrr_mutex);
|
2005-04-17 06:20:36 +08:00
|
|
|
for (i = 0; i < num_var_ranges; ++i) {
|
|
|
|
mtrr_if->get(i, &lbase, &lsize, <ype);
|
2009-07-04 10:26:28 +08:00
|
|
|
if (!lsize || base > lbase + lsize - 1 ||
|
|
|
|
base + size - 1 < lbase)
|
2005-04-17 06:20:36 +08:00
|
|
|
continue;
|
2009-07-04 10:26:28 +08:00
|
|
|
/*
|
|
|
|
* At this point we know there is some kind of
|
|
|
|
* overlap/enclosure
|
|
|
|
*/
|
[PATCH] i386: fix MTRR code
Until not so long ago, there were system log messages pointing to
inconsistent MTRR setup of the video frame buffer caused by the way vesafb
and X worked. While vesafb was fixed meanwhile, I believe fixing it there
only hides a shortcoming in the MTRR code itself, in that that code is not
symmetric with respect to the ordering of attempts to set up two (or more)
regions where one contains the other. In the current shape, it permits
only setting up sub-regions of pre-exisiting ones. The patch below makes
this symmetric.
While working on that I noticed a few more inconsistencies in that code,
namely
- use of 'unsigned int' for sizes in many, but not all places (the patch
is converting this to use 'unsigned long' everywhere, which specifically
might be necessary for x86-64 once a processor supporting more than 44
physical address bits would become available)
- the code to correct inconsistent settings during secondary processor
startup tried (if necessary) to correct, among other things, the value
in IA32_MTRR_DEF_TYPE, however the newly computed value would never get
used (i.e. stored in the respective MSR)
- the generic range validation code checked that the end of the
to-be-added range would be above 1MB; the value checked should have been
the start of the range
- when contained regions are detected, previously this was allowed only
when the old region was uncacheable; this can be symmetric (i.e. the new
region can also be uncacheable) and even further as per Intel's
documentation write-trough and write-back for either region is also
compatible with the respective opposite in the other
Signed-off-by: Jan Beulich <jbeulich@novell.com>
Signed-off-by: Andi Kleen <ak@suse.de>
2006-12-07 09:14:09 +08:00
|
|
|
if (base < lbase || base + size - 1 > lbase + lsize - 1) {
|
2009-07-04 10:26:28 +08:00
|
|
|
if (base <= lbase &&
|
|
|
|
base + size - 1 >= lbase + lsize - 1) {
|
[PATCH] i386: fix MTRR code
Until not so long ago, there were system log messages pointing to
inconsistent MTRR setup of the video frame buffer caused by the way vesafb
and X worked. While vesafb was fixed meanwhile, I believe fixing it there
only hides a shortcoming in the MTRR code itself, in that that code is not
symmetric with respect to the ordering of attempts to set up two (or more)
regions where one contains the other. In the current shape, it permits
only setting up sub-regions of pre-exisiting ones. The patch below makes
this symmetric.
While working on that I noticed a few more inconsistencies in that code,
namely
- use of 'unsigned int' for sizes in many, but not all places (the patch
is converting this to use 'unsigned long' everywhere, which specifically
might be necessary for x86-64 once a processor supporting more than 44
physical address bits would become available)
- the code to correct inconsistent settings during secondary processor
startup tried (if necessary) to correct, among other things, the value
in IA32_MTRR_DEF_TYPE, however the newly computed value would never get
used (i.e. stored in the respective MSR)
- the generic range validation code checked that the end of the
to-be-added range would be above 1MB; the value checked should have been
the start of the range
- when contained regions are detected, previously this was allowed only
when the old region was uncacheable; this can be symmetric (i.e. the new
region can also be uncacheable) and even further as per Intel's
documentation write-trough and write-back for either region is also
compatible with the respective opposite in the other
Signed-off-by: Jan Beulich <jbeulich@novell.com>
Signed-off-by: Andi Kleen <ak@suse.de>
2006-12-07 09:14:09 +08:00
|
|
|
/* New region encloses an existing region */
|
|
|
|
if (type == ltype) {
|
|
|
|
replace = replace == -1 ? i : -2;
|
|
|
|
continue;
|
2009-07-04 10:26:28 +08:00
|
|
|
} else if (types_compatible(type, ltype))
|
[PATCH] i386: fix MTRR code
Until not so long ago, there were system log messages pointing to
inconsistent MTRR setup of the video frame buffer caused by the way vesafb
and X worked. While vesafb was fixed meanwhile, I believe fixing it there
only hides a shortcoming in the MTRR code itself, in that that code is not
symmetric with respect to the ordering of attempts to set up two (or more)
regions where one contains the other. In the current shape, it permits
only setting up sub-regions of pre-exisiting ones. The patch below makes
this symmetric.
While working on that I noticed a few more inconsistencies in that code,
namely
- use of 'unsigned int' for sizes in many, but not all places (the patch
is converting this to use 'unsigned long' everywhere, which specifically
might be necessary for x86-64 once a processor supporting more than 44
physical address bits would become available)
- the code to correct inconsistent settings during secondary processor
startup tried (if necessary) to correct, among other things, the value
in IA32_MTRR_DEF_TYPE, however the newly computed value would never get
used (i.e. stored in the respective MSR)
- the generic range validation code checked that the end of the
to-be-added range would be above 1MB; the value checked should have been
the start of the range
- when contained regions are detected, previously this was allowed only
when the old region was uncacheable; this can be symmetric (i.e. the new
region can also be uncacheable) and even further as per Intel's
documentation write-trough and write-back for either region is also
compatible with the respective opposite in the other
Signed-off-by: Jan Beulich <jbeulich@novell.com>
Signed-off-by: Andi Kleen <ak@suse.de>
2006-12-07 09:14:09 +08:00
|
|
|
continue;
|
|
|
|
}
|
2009-07-04 10:26:28 +08:00
|
|
|
pr_warning("mtrr: 0x%lx000,0x%lx000 overlaps existing"
|
|
|
|
" 0x%lx000,0x%lx000\n", base, size, lbase,
|
|
|
|
lsize);
|
2005-04-17 06:20:36 +08:00
|
|
|
goto out;
|
|
|
|
}
|
2009-07-04 10:26:28 +08:00
|
|
|
/* New region is enclosed by an existing region */
|
2005-04-17 06:20:36 +08:00
|
|
|
if (ltype != type) {
|
[PATCH] i386: fix MTRR code
Until not so long ago, there were system log messages pointing to
inconsistent MTRR setup of the video frame buffer caused by the way vesafb
and X worked. While vesafb was fixed meanwhile, I believe fixing it there
only hides a shortcoming in the MTRR code itself, in that that code is not
symmetric with respect to the ordering of attempts to set up two (or more)
regions where one contains the other. In the current shape, it permits
only setting up sub-regions of pre-exisiting ones. The patch below makes
this symmetric.
While working on that I noticed a few more inconsistencies in that code,
namely
- use of 'unsigned int' for sizes in many, but not all places (the patch
is converting this to use 'unsigned long' everywhere, which specifically
might be necessary for x86-64 once a processor supporting more than 44
physical address bits would become available)
- the code to correct inconsistent settings during secondary processor
startup tried (if necessary) to correct, among other things, the value
in IA32_MTRR_DEF_TYPE, however the newly computed value would never get
used (i.e. stored in the respective MSR)
- the generic range validation code checked that the end of the
to-be-added range would be above 1MB; the value checked should have been
the start of the range
- when contained regions are detected, previously this was allowed only
when the old region was uncacheable; this can be symmetric (i.e. the new
region can also be uncacheable) and even further as per Intel's
documentation write-trough and write-back for either region is also
compatible with the respective opposite in the other
Signed-off-by: Jan Beulich <jbeulich@novell.com>
Signed-off-by: Andi Kleen <ak@suse.de>
2006-12-07 09:14:09 +08:00
|
|
|
if (types_compatible(type, ltype))
|
2005-04-17 06:20:36 +08:00
|
|
|
continue;
|
2009-07-04 10:26:28 +08:00
|
|
|
pr_warning("mtrr: type mismatch for %lx000,%lx000 old: %s new: %s\n",
|
|
|
|
base, size, mtrr_attrib_to_str(ltype),
|
|
|
|
mtrr_attrib_to_str(type));
|
2005-04-17 06:20:36 +08:00
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
if (increment)
|
x86, 32-bit: trim memory not covered by wb mtrrs
On some machines, buggy BIOSes don't properly setup WB MTRRs to cover all
available RAM, meaning the last few megs (or even gigs) of memory will be
marked uncached. Since Linux tends to allocate from high memory addresses
first, this causes the machine to be unusably slow as soon as the kernel
starts really using memory (i.e. right around init time).
This patch works around the problem by scanning the MTRRs at boot and
figuring out whether the current end_pfn value (setup by early e820 code)
goes beyond the highest WB MTRR range, and if so, trimming it to match. A
fairly obnoxious KERN_WARNING is printed too, letting the user know that
not all of their memory is available due to a likely BIOS bug.
Something similar could be done on i386 if needed, but the boot ordering
would be slightly different, since the MTRR code on i386 depends on the
boot_cpu_data structure being setup.
This patch fixes a bug in the last patch that caused the code to run on
non-Intel machines (AMD machines apparently don't need it and it's untested
on other non-Intel machines, so best keep it off).
Further enhancements and fixes from:
Yinghai Lu <Yinghai.Lu@Sun.COM>
Andi Kleen <ak@suse.de>
Signed-off-by: Jesse Barnes <jesse.barnes@intel.com>
Tested-by: Justin Piszcz <jpiszcz@lucidpixels.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
2008-01-30 20:33:18 +08:00
|
|
|
++mtrr_usage_table[i];
|
2005-04-17 06:20:36 +08:00
|
|
|
error = i;
|
|
|
|
goto out;
|
|
|
|
}
|
2009-07-04 10:26:28 +08:00
|
|
|
/* Search for an empty MTRR */
|
[PATCH] i386: fix MTRR code
Until not so long ago, there were system log messages pointing to
inconsistent MTRR setup of the video frame buffer caused by the way vesafb
and X worked. While vesafb was fixed meanwhile, I believe fixing it there
only hides a shortcoming in the MTRR code itself, in that that code is not
symmetric with respect to the ordering of attempts to set up two (or more)
regions where one contains the other. In the current shape, it permits
only setting up sub-regions of pre-exisiting ones. The patch below makes
this symmetric.
While working on that I noticed a few more inconsistencies in that code,
namely
- use of 'unsigned int' for sizes in many, but not all places (the patch
is converting this to use 'unsigned long' everywhere, which specifically
might be necessary for x86-64 once a processor supporting more than 44
physical address bits would become available)
- the code to correct inconsistent settings during secondary processor
startup tried (if necessary) to correct, among other things, the value
in IA32_MTRR_DEF_TYPE, however the newly computed value would never get
used (i.e. stored in the respective MSR)
- the generic range validation code checked that the end of the
to-be-added range would be above 1MB; the value checked should have been
the start of the range
- when contained regions are detected, previously this was allowed only
when the old region was uncacheable; this can be symmetric (i.e. the new
region can also be uncacheable) and even further as per Intel's
documentation write-trough and write-back for either region is also
compatible with the respective opposite in the other
Signed-off-by: Jan Beulich <jbeulich@novell.com>
Signed-off-by: Andi Kleen <ak@suse.de>
2006-12-07 09:14:09 +08:00
|
|
|
i = mtrr_if->get_free_region(base, size, replace);
|
2005-04-17 06:20:36 +08:00
|
|
|
if (i >= 0) {
|
|
|
|
set_mtrr(i, base, size, type);
|
x86, 32-bit: trim memory not covered by wb mtrrs
On some machines, buggy BIOSes don't properly setup WB MTRRs to cover all
available RAM, meaning the last few megs (or even gigs) of memory will be
marked uncached. Since Linux tends to allocate from high memory addresses
first, this causes the machine to be unusably slow as soon as the kernel
starts really using memory (i.e. right around init time).
This patch works around the problem by scanning the MTRRs at boot and
figuring out whether the current end_pfn value (setup by early e820 code)
goes beyond the highest WB MTRR range, and if so, trimming it to match. A
fairly obnoxious KERN_WARNING is printed too, letting the user know that
not all of their memory is available due to a likely BIOS bug.
Something similar could be done on i386 if needed, but the boot ordering
would be slightly different, since the MTRR code on i386 depends on the
boot_cpu_data structure being setup.
This patch fixes a bug in the last patch that caused the code to run on
non-Intel machines (AMD machines apparently don't need it and it's untested
on other non-Intel machines, so best keep it off).
Further enhancements and fixes from:
Yinghai Lu <Yinghai.Lu@Sun.COM>
Andi Kleen <ak@suse.de>
Signed-off-by: Jesse Barnes <jesse.barnes@intel.com>
Tested-by: Justin Piszcz <jpiszcz@lucidpixels.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
2008-01-30 20:33:18 +08:00
|
|
|
if (likely(replace < 0)) {
|
|
|
|
mtrr_usage_table[i] = 1;
|
|
|
|
} else {
|
|
|
|
mtrr_usage_table[i] = mtrr_usage_table[replace];
|
2008-01-30 20:30:31 +08:00
|
|
|
if (increment)
|
x86, 32-bit: trim memory not covered by wb mtrrs
On some machines, buggy BIOSes don't properly setup WB MTRRs to cover all
available RAM, meaning the last few megs (or even gigs) of memory will be
marked uncached. Since Linux tends to allocate from high memory addresses
first, this causes the machine to be unusably slow as soon as the kernel
starts really using memory (i.e. right around init time).
This patch works around the problem by scanning the MTRRs at boot and
figuring out whether the current end_pfn value (setup by early e820 code)
goes beyond the highest WB MTRR range, and if so, trimming it to match. A
fairly obnoxious KERN_WARNING is printed too, letting the user know that
not all of their memory is available due to a likely BIOS bug.
Something similar could be done on i386 if needed, but the boot ordering
would be slightly different, since the MTRR code on i386 depends on the
boot_cpu_data structure being setup.
This patch fixes a bug in the last patch that caused the code to run on
non-Intel machines (AMD machines apparently don't need it and it's untested
on other non-Intel machines, so best keep it off).
Further enhancements and fixes from:
Yinghai Lu <Yinghai.Lu@Sun.COM>
Andi Kleen <ak@suse.de>
Signed-off-by: Jesse Barnes <jesse.barnes@intel.com>
Tested-by: Justin Piszcz <jpiszcz@lucidpixels.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
2008-01-30 20:33:18 +08:00
|
|
|
mtrr_usage_table[i]++;
|
[PATCH] i386: fix MTRR code
Until not so long ago, there were system log messages pointing to
inconsistent MTRR setup of the video frame buffer caused by the way vesafb
and X worked. While vesafb was fixed meanwhile, I believe fixing it there
only hides a shortcoming in the MTRR code itself, in that that code is not
symmetric with respect to the ordering of attempts to set up two (or more)
regions where one contains the other. In the current shape, it permits
only setting up sub-regions of pre-exisiting ones. The patch below makes
this symmetric.
While working on that I noticed a few more inconsistencies in that code,
namely
- use of 'unsigned int' for sizes in many, but not all places (the patch
is converting this to use 'unsigned long' everywhere, which specifically
might be necessary for x86-64 once a processor supporting more than 44
physical address bits would become available)
- the code to correct inconsistent settings during secondary processor
startup tried (if necessary) to correct, among other things, the value
in IA32_MTRR_DEF_TYPE, however the newly computed value would never get
used (i.e. stored in the respective MSR)
- the generic range validation code checked that the end of the
to-be-added range would be above 1MB; the value checked should have been
the start of the range
- when contained regions are detected, previously this was allowed only
when the old region was uncacheable; this can be symmetric (i.e. the new
region can also be uncacheable) and even further as per Intel's
documentation write-trough and write-back for either region is also
compatible with the respective opposite in the other
Signed-off-by: Jan Beulich <jbeulich@novell.com>
Signed-off-by: Andi Kleen <ak@suse.de>
2006-12-07 09:14:09 +08:00
|
|
|
if (unlikely(replace != i)) {
|
|
|
|
set_mtrr(replace, 0, 0, 0);
|
x86, 32-bit: trim memory not covered by wb mtrrs
On some machines, buggy BIOSes don't properly setup WB MTRRs to cover all
available RAM, meaning the last few megs (or even gigs) of memory will be
marked uncached. Since Linux tends to allocate from high memory addresses
first, this causes the machine to be unusably slow as soon as the kernel
starts really using memory (i.e. right around init time).
This patch works around the problem by scanning the MTRRs at boot and
figuring out whether the current end_pfn value (setup by early e820 code)
goes beyond the highest WB MTRR range, and if so, trimming it to match. A
fairly obnoxious KERN_WARNING is printed too, letting the user know that
not all of their memory is available due to a likely BIOS bug.
Something similar could be done on i386 if needed, but the boot ordering
would be slightly different, since the MTRR code on i386 depends on the
boot_cpu_data structure being setup.
This patch fixes a bug in the last patch that caused the code to run on
non-Intel machines (AMD machines apparently don't need it and it's untested
on other non-Intel machines, so best keep it off).
Further enhancements and fixes from:
Yinghai Lu <Yinghai.Lu@Sun.COM>
Andi Kleen <ak@suse.de>
Signed-off-by: Jesse Barnes <jesse.barnes@intel.com>
Tested-by: Justin Piszcz <jpiszcz@lucidpixels.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
2008-01-30 20:33:18 +08:00
|
|
|
mtrr_usage_table[replace] = 0;
|
[PATCH] i386: fix MTRR code
Until not so long ago, there were system log messages pointing to
inconsistent MTRR setup of the video frame buffer caused by the way vesafb
and X worked. While vesafb was fixed meanwhile, I believe fixing it there
only hides a shortcoming in the MTRR code itself, in that that code is not
symmetric with respect to the ordering of attempts to set up two (or more)
regions where one contains the other. In the current shape, it permits
only setting up sub-regions of pre-exisiting ones. The patch below makes
this symmetric.
While working on that I noticed a few more inconsistencies in that code,
namely
- use of 'unsigned int' for sizes in many, but not all places (the patch
is converting this to use 'unsigned long' everywhere, which specifically
might be necessary for x86-64 once a processor supporting more than 44
physical address bits would become available)
- the code to correct inconsistent settings during secondary processor
startup tried (if necessary) to correct, among other things, the value
in IA32_MTRR_DEF_TYPE, however the newly computed value would never get
used (i.e. stored in the respective MSR)
- the generic range validation code checked that the end of the
to-be-added range would be above 1MB; the value checked should have been
the start of the range
- when contained regions are detected, previously this was allowed only
when the old region was uncacheable; this can be symmetric (i.e. the new
region can also be uncacheable) and even further as per Intel's
documentation write-trough and write-back for either region is also
compatible with the respective opposite in the other
Signed-off-by: Jan Beulich <jbeulich@novell.com>
Signed-off-by: Andi Kleen <ak@suse.de>
2006-12-07 09:14:09 +08:00
|
|
|
}
|
|
|
|
}
|
2009-07-04 10:26:28 +08:00
|
|
|
} else {
|
|
|
|
pr_info("mtrr: no more MTRRs available\n");
|
|
|
|
}
|
2005-04-17 06:20:36 +08:00
|
|
|
error = i;
|
|
|
|
out:
|
2006-03-26 17:37:14 +08:00
|
|
|
mutex_unlock(&mtrr_mutex);
|
2008-01-26 04:08:02 +08:00
|
|
|
put_online_cpus();
|
2005-04-17 06:20:36 +08:00
|
|
|
return error;
|
|
|
|
}
|
|
|
|
|
2005-06-23 15:08:35 +08:00
|
|
|
static int mtrr_check(unsigned long base, unsigned long size)
|
|
|
|
{
|
|
|
|
if ((base & (PAGE_SIZE - 1)) || (size & (PAGE_SIZE - 1))) {
|
2009-07-04 10:26:28 +08:00
|
|
|
pr_warning("mtrr: size and base must be multiples of 4 kiB\n");
|
|
|
|
pr_debug("mtrr: size: 0x%lx base: 0x%lx\n", size, base);
|
2005-06-23 15:08:35 +08:00
|
|
|
dump_stack();
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
/**
|
2009-07-04 10:26:28 +08:00
|
|
|
* mtrr_add - Add a memory type region
|
|
|
|
* @base: Physical base address of region
|
|
|
|
* @size: Physical size of region
|
|
|
|
* @type: Type of MTRR desired
|
|
|
|
* @increment: If this is true do usage counting on the region
|
2005-04-17 06:20:36 +08:00
|
|
|
*
|
2009-07-04 10:26:28 +08:00
|
|
|
* Memory type region registers control the caching on newer Intel and
|
|
|
|
* non Intel processors. This function allows drivers to request an
|
|
|
|
* MTRR is added. The details and hardware specifics of each processor's
|
|
|
|
* implementation are hidden from the caller, but nevertheless the
|
|
|
|
* caller should expect to need to provide a power of two size on an
|
|
|
|
* equivalent power of two boundary.
|
2005-04-17 06:20:36 +08:00
|
|
|
*
|
2009-07-04 10:26:28 +08:00
|
|
|
* If the region cannot be added either because all regions are in use
|
|
|
|
* or the CPU cannot support it a negative value is returned. On success
|
|
|
|
* the register number for this entry is returned, but should be treated
|
|
|
|
* as a cookie only.
|
2005-04-17 06:20:36 +08:00
|
|
|
*
|
2009-07-04 10:26:28 +08:00
|
|
|
* On a multiprocessor machine the changes are made to all processors.
|
|
|
|
* This is required on x86 by the Intel processors.
|
2005-04-17 06:20:36 +08:00
|
|
|
*
|
2009-07-04 10:26:28 +08:00
|
|
|
* The available types are
|
2005-04-17 06:20:36 +08:00
|
|
|
*
|
2009-07-04 10:26:28 +08:00
|
|
|
* %MTRR_TYPE_UNCACHABLE - No caching
|
2005-04-17 06:20:36 +08:00
|
|
|
*
|
2009-07-04 10:26:28 +08:00
|
|
|
* %MTRR_TYPE_WRBACK - Write data back in bursts whenever
|
2005-04-17 06:20:36 +08:00
|
|
|
*
|
2009-07-04 10:26:28 +08:00
|
|
|
* %MTRR_TYPE_WRCOMB - Write data back soon but allow bursts
|
2005-04-17 06:20:36 +08:00
|
|
|
*
|
2009-07-04 10:26:28 +08:00
|
|
|
* %MTRR_TYPE_WRTHROUGH - Cache reads but not writes
|
2005-04-17 06:20:36 +08:00
|
|
|
*
|
2009-07-04 10:26:28 +08:00
|
|
|
* BUGS: Needs a quiet flag for the cases where drivers do not mind
|
|
|
|
* failures and do not wish system log messages to be sent.
|
2005-04-17 06:20:36 +08:00
|
|
|
*/
|
2009-07-04 10:26:28 +08:00
|
|
|
int mtrr_add(unsigned long base, unsigned long size, unsigned int type,
|
|
|
|
bool increment)
|
2005-04-17 06:20:36 +08:00
|
|
|
{
|
2005-06-23 15:08:35 +08:00
|
|
|
if (mtrr_check(base, size))
|
2005-04-17 06:20:36 +08:00
|
|
|
return -EINVAL;
|
|
|
|
return mtrr_add_page(base >> PAGE_SHIFT, size >> PAGE_SHIFT, type,
|
|
|
|
increment);
|
|
|
|
}
|
2009-07-04 10:26:28 +08:00
|
|
|
EXPORT_SYMBOL(mtrr_add);
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
/**
|
2009-07-04 10:26:28 +08:00
|
|
|
* mtrr_del_page - delete a memory type region
|
|
|
|
* @reg: Register returned by mtrr_add
|
|
|
|
* @base: Physical base address
|
|
|
|
* @size: Size of region
|
2005-04-17 06:20:36 +08:00
|
|
|
*
|
2009-07-04 10:26:28 +08:00
|
|
|
* If register is supplied then base and size are ignored. This is
|
|
|
|
* how drivers should call it.
|
2005-04-17 06:20:36 +08:00
|
|
|
*
|
2009-07-04 10:26:28 +08:00
|
|
|
* Releases an MTRR region. If the usage count drops to zero the
|
|
|
|
* register is freed and the region returns to default state.
|
|
|
|
* On success the register is returned, on failure a negative error
|
|
|
|
* code.
|
2005-04-17 06:20:36 +08:00
|
|
|
*/
|
|
|
|
int mtrr_del_page(int reg, unsigned long base, unsigned long size)
|
|
|
|
{
|
|
|
|
int i, max;
|
|
|
|
mtrr_type ltype;
|
[PATCH] i386: fix MTRR code
Until not so long ago, there were system log messages pointing to
inconsistent MTRR setup of the video frame buffer caused by the way vesafb
and X worked. While vesafb was fixed meanwhile, I believe fixing it there
only hides a shortcoming in the MTRR code itself, in that that code is not
symmetric with respect to the ordering of attempts to set up two (or more)
regions where one contains the other. In the current shape, it permits
only setting up sub-regions of pre-exisiting ones. The patch below makes
this symmetric.
While working on that I noticed a few more inconsistencies in that code,
namely
- use of 'unsigned int' for sizes in many, but not all places (the patch
is converting this to use 'unsigned long' everywhere, which specifically
might be necessary for x86-64 once a processor supporting more than 44
physical address bits would become available)
- the code to correct inconsistent settings during secondary processor
startup tried (if necessary) to correct, among other things, the value
in IA32_MTRR_DEF_TYPE, however the newly computed value would never get
used (i.e. stored in the respective MSR)
- the generic range validation code checked that the end of the
to-be-added range would be above 1MB; the value checked should have been
the start of the range
- when contained regions are detected, previously this was allowed only
when the old region was uncacheable; this can be symmetric (i.e. the new
region can also be uncacheable) and even further as per Intel's
documentation write-trough and write-back for either region is also
compatible with the respective opposite in the other
Signed-off-by: Jan Beulich <jbeulich@novell.com>
Signed-off-by: Andi Kleen <ak@suse.de>
2006-12-07 09:14:09 +08:00
|
|
|
unsigned long lbase, lsize;
|
2005-04-17 06:20:36 +08:00
|
|
|
int error = -EINVAL;
|
|
|
|
|
|
|
|
if (!mtrr_if)
|
|
|
|
return -ENXIO;
|
|
|
|
|
|
|
|
max = num_var_ranges;
|
2005-07-08 08:56:38 +08:00
|
|
|
/* No CPU hotplug when we change MTRR entries */
|
2008-01-26 04:08:02 +08:00
|
|
|
get_online_cpus();
|
2006-03-26 17:37:14 +08:00
|
|
|
mutex_lock(&mtrr_mutex);
|
2005-04-17 06:20:36 +08:00
|
|
|
if (reg < 0) {
|
|
|
|
/* Search for existing MTRR */
|
|
|
|
for (i = 0; i < max; ++i) {
|
|
|
|
mtrr_if->get(i, &lbase, &lsize, <ype);
|
|
|
|
if (lbase == base && lsize == size) {
|
|
|
|
reg = i;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (reg < 0) {
|
2009-07-04 10:26:28 +08:00
|
|
|
pr_debug("mtrr: no MTRR for %lx000,%lx000 found\n",
|
|
|
|
base, size);
|
2005-04-17 06:20:36 +08:00
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (reg >= max) {
|
2009-07-04 10:26:28 +08:00
|
|
|
pr_warning("mtrr: register: %d too big\n", reg);
|
2005-04-17 06:20:36 +08:00
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
mtrr_if->get(reg, &lbase, &lsize, <ype);
|
|
|
|
if (lsize < 1) {
|
2009-07-04 10:26:28 +08:00
|
|
|
pr_warning("mtrr: MTRR %d not used\n", reg);
|
2005-04-17 06:20:36 +08:00
|
|
|
goto out;
|
|
|
|
}
|
x86, 32-bit: trim memory not covered by wb mtrrs
On some machines, buggy BIOSes don't properly setup WB MTRRs to cover all
available RAM, meaning the last few megs (or even gigs) of memory will be
marked uncached. Since Linux tends to allocate from high memory addresses
first, this causes the machine to be unusably slow as soon as the kernel
starts really using memory (i.e. right around init time).
This patch works around the problem by scanning the MTRRs at boot and
figuring out whether the current end_pfn value (setup by early e820 code)
goes beyond the highest WB MTRR range, and if so, trimming it to match. A
fairly obnoxious KERN_WARNING is printed too, letting the user know that
not all of their memory is available due to a likely BIOS bug.
Something similar could be done on i386 if needed, but the boot ordering
would be slightly different, since the MTRR code on i386 depends on the
boot_cpu_data structure being setup.
This patch fixes a bug in the last patch that caused the code to run on
non-Intel machines (AMD machines apparently don't need it and it's untested
on other non-Intel machines, so best keep it off).
Further enhancements and fixes from:
Yinghai Lu <Yinghai.Lu@Sun.COM>
Andi Kleen <ak@suse.de>
Signed-off-by: Jesse Barnes <jesse.barnes@intel.com>
Tested-by: Justin Piszcz <jpiszcz@lucidpixels.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
2008-01-30 20:33:18 +08:00
|
|
|
if (mtrr_usage_table[reg] < 1) {
|
2009-07-04 10:26:28 +08:00
|
|
|
pr_warning("mtrr: reg: %d has count=0\n", reg);
|
2005-04-17 06:20:36 +08:00
|
|
|
goto out;
|
|
|
|
}
|
x86, 32-bit: trim memory not covered by wb mtrrs
On some machines, buggy BIOSes don't properly setup WB MTRRs to cover all
available RAM, meaning the last few megs (or even gigs) of memory will be
marked uncached. Since Linux tends to allocate from high memory addresses
first, this causes the machine to be unusably slow as soon as the kernel
starts really using memory (i.e. right around init time).
This patch works around the problem by scanning the MTRRs at boot and
figuring out whether the current end_pfn value (setup by early e820 code)
goes beyond the highest WB MTRR range, and if so, trimming it to match. A
fairly obnoxious KERN_WARNING is printed too, letting the user know that
not all of their memory is available due to a likely BIOS bug.
Something similar could be done on i386 if needed, but the boot ordering
would be slightly different, since the MTRR code on i386 depends on the
boot_cpu_data structure being setup.
This patch fixes a bug in the last patch that caused the code to run on
non-Intel machines (AMD machines apparently don't need it and it's untested
on other non-Intel machines, so best keep it off).
Further enhancements and fixes from:
Yinghai Lu <Yinghai.Lu@Sun.COM>
Andi Kleen <ak@suse.de>
Signed-off-by: Jesse Barnes <jesse.barnes@intel.com>
Tested-by: Justin Piszcz <jpiszcz@lucidpixels.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
2008-01-30 20:33:18 +08:00
|
|
|
if (--mtrr_usage_table[reg] < 1)
|
2005-04-17 06:20:36 +08:00
|
|
|
set_mtrr(reg, 0, 0, 0);
|
|
|
|
error = reg;
|
|
|
|
out:
|
2006-03-26 17:37:14 +08:00
|
|
|
mutex_unlock(&mtrr_mutex);
|
2008-01-26 04:08:02 +08:00
|
|
|
put_online_cpus();
|
2005-04-17 06:20:36 +08:00
|
|
|
return error;
|
|
|
|
}
|
2009-07-04 10:26:28 +08:00
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
/**
|
2009-07-04 10:26:28 +08:00
|
|
|
* mtrr_del - delete a memory type region
|
|
|
|
* @reg: Register returned by mtrr_add
|
|
|
|
* @base: Physical base address
|
|
|
|
* @size: Size of region
|
2005-04-17 06:20:36 +08:00
|
|
|
*
|
2009-07-04 10:26:28 +08:00
|
|
|
* If register is supplied then base and size are ignored. This is
|
|
|
|
* how drivers should call it.
|
2005-04-17 06:20:36 +08:00
|
|
|
*
|
2009-07-04 10:26:28 +08:00
|
|
|
* Releases an MTRR region. If the usage count drops to zero the
|
|
|
|
* register is freed and the region returns to default state.
|
|
|
|
* On success the register is returned, on failure a negative error
|
|
|
|
* code.
|
2005-04-17 06:20:36 +08:00
|
|
|
*/
|
2009-07-04 10:26:28 +08:00
|
|
|
int mtrr_del(int reg, unsigned long base, unsigned long size)
|
2005-04-17 06:20:36 +08:00
|
|
|
{
|
2005-06-23 15:08:35 +08:00
|
|
|
if (mtrr_check(base, size))
|
2005-04-17 06:20:36 +08:00
|
|
|
return -EINVAL;
|
|
|
|
return mtrr_del_page(reg, base >> PAGE_SHIFT, size >> PAGE_SHIFT);
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL(mtrr_del);
|
|
|
|
|
2013-05-14 07:58:40 +08:00
|
|
|
/**
|
|
|
|
* arch_phys_wc_add - add a WC MTRR and handle errors if PAT is unavailable
|
|
|
|
* @base: Physical base address
|
|
|
|
* @size: Size of region
|
|
|
|
*
|
|
|
|
* If PAT is available, this does nothing. If PAT is unavailable, it
|
|
|
|
* attempts to add a WC MTRR covering size bytes starting at base and
|
|
|
|
* logs an error if this fails.
|
|
|
|
*
|
|
|
|
* Drivers must store the return value to pass to mtrr_del_wc_if_needed,
|
|
|
|
* but drivers should not try to interpret that return value.
|
|
|
|
*/
|
|
|
|
int arch_phys_wc_add(unsigned long base, unsigned long size)
|
|
|
|
{
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
if (pat_enabled)
|
|
|
|
return 0; /* Success! (We don't need to do anything.) */
|
|
|
|
|
|
|
|
ret = mtrr_add(base, size, MTRR_TYPE_WRCOMB, true);
|
|
|
|
if (ret < 0) {
|
|
|
|
pr_warn("Failed to add WC MTRR for [%p-%p]; performance may suffer.",
|
|
|
|
(void *)base, (void *)(base + size - 1));
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
return ret + MTRR_TO_PHYS_WC_OFFSET;
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL(arch_phys_wc_add);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* arch_phys_wc_del - undoes arch_phys_wc_add
|
|
|
|
* @handle: Return value from arch_phys_wc_add
|
|
|
|
*
|
|
|
|
* This cleans up after mtrr_add_wc_if_needed.
|
|
|
|
*
|
|
|
|
* The API guarantees that mtrr_del_wc_if_needed(error code) and
|
|
|
|
* mtrr_del_wc_if_needed(0) do nothing.
|
|
|
|
*/
|
|
|
|
void arch_phys_wc_del(int handle)
|
|
|
|
{
|
|
|
|
if (handle >= 1) {
|
|
|
|
WARN_ON(handle < MTRR_TO_PHYS_WC_OFFSET);
|
|
|
|
mtrr_del(handle - MTRR_TO_PHYS_WC_OFFSET, 0, 0);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL(arch_phys_wc_del);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* phys_wc_to_mtrr_index - translates arch_phys_wc_add's return value
|
|
|
|
* @handle: Return value from arch_phys_wc_add
|
|
|
|
*
|
|
|
|
* This will turn the return value from arch_phys_wc_add into an mtrr
|
|
|
|
* index suitable for debugging.
|
|
|
|
*
|
|
|
|
* Note: There is no legitimate use for this function, except possibly
|
|
|
|
* in printk line. Alas there is an illegitimate use in some ancient
|
|
|
|
* drm ioctls.
|
|
|
|
*/
|
|
|
|
int phys_wc_to_mtrr_index(int handle)
|
|
|
|
{
|
|
|
|
if (handle < MTRR_TO_PHYS_WC_OFFSET)
|
|
|
|
return -1;
|
|
|
|
else
|
|
|
|
return handle - MTRR_TO_PHYS_WC_OFFSET;
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL_GPL(phys_wc_to_mtrr_index);
|
|
|
|
|
2009-07-04 10:26:28 +08:00
|
|
|
/*
|
|
|
|
* HACK ALERT!
|
2005-04-17 06:20:36 +08:00
|
|
|
* These should be called implicitly, but we can't yet until all the initcall
|
|
|
|
* stuff is done...
|
|
|
|
*/
|
|
|
|
static void __init init_ifs(void)
|
|
|
|
{
|
2006-12-07 09:14:09 +08:00
|
|
|
#ifndef CONFIG_X86_64
|
2005-04-17 06:20:36 +08:00
|
|
|
amd_init_mtrr();
|
|
|
|
cyrix_init_mtrr();
|
|
|
|
centaur_init_mtrr();
|
2006-12-07 09:14:09 +08:00
|
|
|
#endif
|
2005-04-17 06:20:36 +08:00
|
|
|
}
|
|
|
|
|
2005-07-08 08:56:38 +08:00
|
|
|
/* The suspend/resume methods are only for CPU without MTRR. CPU using generic
|
|
|
|
* MTRR driver doesn't require this
|
|
|
|
*/
|
2005-04-17 06:20:36 +08:00
|
|
|
struct mtrr_value {
|
|
|
|
mtrr_type ltype;
|
|
|
|
unsigned long lbase;
|
[PATCH] i386: fix MTRR code
Until not so long ago, there were system log messages pointing to
inconsistent MTRR setup of the video frame buffer caused by the way vesafb
and X worked. While vesafb was fixed meanwhile, I believe fixing it there
only hides a shortcoming in the MTRR code itself, in that that code is not
symmetric with respect to the ordering of attempts to set up two (or more)
regions where one contains the other. In the current shape, it permits
only setting up sub-regions of pre-exisiting ones. The patch below makes
this symmetric.
While working on that I noticed a few more inconsistencies in that code,
namely
- use of 'unsigned int' for sizes in many, but not all places (the patch
is converting this to use 'unsigned long' everywhere, which specifically
might be necessary for x86-64 once a processor supporting more than 44
physical address bits would become available)
- the code to correct inconsistent settings during secondary processor
startup tried (if necessary) to correct, among other things, the value
in IA32_MTRR_DEF_TYPE, however the newly computed value would never get
used (i.e. stored in the respective MSR)
- the generic range validation code checked that the end of the
to-be-added range would be above 1MB; the value checked should have been
the start of the range
- when contained regions are detected, previously this was allowed only
when the old region was uncacheable; this can be symmetric (i.e. the new
region can also be uncacheable) and even further as per Intel's
documentation write-trough and write-back for either region is also
compatible with the respective opposite in the other
Signed-off-by: Jan Beulich <jbeulich@novell.com>
Signed-off-by: Andi Kleen <ak@suse.de>
2006-12-07 09:14:09 +08:00
|
|
|
unsigned long lsize;
|
2005-04-17 06:20:36 +08:00
|
|
|
};
|
|
|
|
|
2009-03-17 07:33:59 +08:00
|
|
|
static struct mtrr_value mtrr_value[MTRR_MAX_VAR_RANGES];
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2011-03-24 05:15:54 +08:00
|
|
|
static int mtrr_save(void)
|
2005-04-17 06:20:36 +08:00
|
|
|
{
|
|
|
|
int i;
|
|
|
|
|
|
|
|
for (i = 0; i < num_var_ranges; i++) {
|
2009-07-04 10:26:28 +08:00
|
|
|
mtrr_if->get(i, &mtrr_value[i].lbase,
|
|
|
|
&mtrr_value[i].lsize,
|
|
|
|
&mtrr_value[i].ltype);
|
2005-04-17 06:20:36 +08:00
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2011-03-24 05:15:54 +08:00
|
|
|
static void mtrr_restore(void)
|
2005-04-17 06:20:36 +08:00
|
|
|
{
|
|
|
|
int i;
|
|
|
|
|
|
|
|
for (i = 0; i < num_var_ranges; i++) {
|
2009-07-04 10:26:28 +08:00
|
|
|
if (mtrr_value[i].lsize) {
|
|
|
|
set_mtrr(i, mtrr_value[i].lbase,
|
|
|
|
mtrr_value[i].lsize,
|
|
|
|
mtrr_value[i].ltype);
|
|
|
|
}
|
2005-04-17 06:20:36 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
2011-03-24 05:15:54 +08:00
|
|
|
static struct syscore_ops mtrr_syscore_ops = {
|
2005-04-17 06:20:36 +08:00
|
|
|
.suspend = mtrr_save,
|
|
|
|
.resume = mtrr_restore,
|
|
|
|
};
|
|
|
|
|
2009-03-12 11:07:39 +08:00
|
|
|
int __initdata changed_by_mtrr_cleanup;
|
2005-04-17 06:20:36 +08:00
|
|
|
|
x86: Fix /proc/mtrr with base/size more than 44bits
On one sytem that mtrr range is more then 44bits, in dmesg we have
[ 0.000000] MTRR default type: write-back
[ 0.000000] MTRR fixed ranges enabled:
[ 0.000000] 00000-9FFFF write-back
[ 0.000000] A0000-BFFFF uncachable
[ 0.000000] C0000-DFFFF write-through
[ 0.000000] E0000-FFFFF write-protect
[ 0.000000] MTRR variable ranges enabled:
[ 0.000000] 0 [000080000000-0000FFFFFFFF] mask 3FFF80000000 uncachable
[ 0.000000] 1 [380000000000-38FFFFFFFFFF] mask 3F0000000000 uncachable
[ 0.000000] 2 [000099000000-000099FFFFFF] mask 3FFFFF000000 write-through
[ 0.000000] 3 [00009A000000-00009AFFFFFF] mask 3FFFFF000000 write-through
[ 0.000000] 4 [381FFA000000-381FFBFFFFFF] mask 3FFFFE000000 write-through
[ 0.000000] 5 [381FFC000000-381FFC0FFFFF] mask 3FFFFFF00000 write-through
[ 0.000000] 6 [0000AD000000-0000ADFFFFFF] mask 3FFFFF000000 write-through
[ 0.000000] 7 [0000BD000000-0000BDFFFFFF] mask 3FFFFF000000 write-through
[ 0.000000] 8 disabled
[ 0.000000] 9 disabled
but /proc/mtrr report wrong:
reg00: base=0x080000000 ( 2048MB), size= 2048MB, count=1: uncachable
reg01: base=0x80000000000 (8388608MB), size=1048576MB, count=1: uncachable
reg02: base=0x099000000 ( 2448MB), size= 16MB, count=1: write-through
reg03: base=0x09a000000 ( 2464MB), size= 16MB, count=1: write-through
reg04: base=0x81ffa000000 (8519584MB), size= 32MB, count=1: write-through
reg05: base=0x81ffc000000 (8519616MB), size= 1MB, count=1: write-through
reg06: base=0x0ad000000 ( 2768MB), size= 16MB, count=1: write-through
reg07: base=0x0bd000000 ( 3024MB), size= 16MB, count=1: write-through
reg08: base=0x09b000000 ( 2480MB), size= 16MB, count=1: write-combining
so bit 44 and bit 45 get cut off.
We have problems in arch/x86/kernel/cpu/mtrr/generic.c::generic_get_mtrr().
1. for base, we miss cast base_lo to 64bit before shifting.
Fix that by adding u64 casting.
2. for size, it only can handle 44 bits aka 32bits + page_shift
Fix that with 64bit mask instead of 32bit mask_lo, then range could be
more than 44bits.
At the same time, we need to update size_or_mask for old cpus that does
support cpuid 0x80000008 to get phys_addr. Need to set high 32bits
to all 1s, otherwise will not get correct size for them.
Also fix mtrr_add_page: it should check base and (base + size - 1)
instead of base and size, as base and size could be small but
base + size could bigger enough to be out of boundary. We can
use boot_cpu_data.x86_phys_bits directly to avoid size_or_mask.
So When are we going to have size more than 44bits? that is 16TiB.
after patch we have right ouput:
reg00: base=0x080000000 ( 2048MB), size= 2048MB, count=1: uncachable
reg01: base=0x380000000000 (58720256MB), size=1048576MB, count=1: uncachable
reg02: base=0x099000000 ( 2448MB), size= 16MB, count=1: write-through
reg03: base=0x09a000000 ( 2464MB), size= 16MB, count=1: write-through
reg04: base=0x381ffa000000 (58851232MB), size= 32MB, count=1: write-through
reg05: base=0x381ffc000000 (58851264MB), size= 1MB, count=1: write-through
reg06: base=0x0ad000000 ( 2768MB), size= 16MB, count=1: write-through
reg07: base=0x0bd000000 ( 3024MB), size= 16MB, count=1: write-through
reg08: base=0x09b000000 ( 2480MB), size= 16MB, count=1: write-combining
-v2: simply checking in mtrr_add_page according to hpa.
[ hpa: This probably wants to go into -stable only after having sat in
mainline for a bit. It is not a regression. ]
Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Link: http://lkml.kernel.org/r/1371162815-29931-1-git-send-email-yinghai@kernel.org
Cc: <stable@vger.kernel.org>
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
2013-06-14 06:33:35 +08:00
|
|
|
#define SIZE_OR_MASK_BITS(n) (~((1ULL << ((n) - PAGE_SHIFT)) - 1))
|
2005-04-17 06:20:36 +08:00
|
|
|
/**
|
2005-07-08 08:56:38 +08:00
|
|
|
* mtrr_bp_init - initialize mtrrs on the boot CPU
|
2005-04-17 06:20:36 +08:00
|
|
|
*
|
2009-07-04 10:26:28 +08:00
|
|
|
* This needs to be called early; before any of the other CPUs are
|
2005-04-17 06:20:36 +08:00
|
|
|
* initialized (i.e. before smp_init()).
|
2009-07-04 10:26:28 +08:00
|
|
|
*
|
2005-04-17 06:20:36 +08:00
|
|
|
*/
|
2007-07-21 23:10:39 +08:00
|
|
|
void __init mtrr_bp_init(void)
|
2005-04-17 06:20:36 +08:00
|
|
|
{
|
2008-04-29 18:52:33 +08:00
|
|
|
u32 phys_addr;
|
2009-07-04 10:26:28 +08:00
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
init_ifs();
|
|
|
|
|
2008-04-29 18:52:33 +08:00
|
|
|
phys_addr = 32;
|
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
if (cpu_has_mtrr) {
|
|
|
|
mtrr_if = &generic_mtrr_ops;
|
x86: Fix /proc/mtrr with base/size more than 44bits
On one sytem that mtrr range is more then 44bits, in dmesg we have
[ 0.000000] MTRR default type: write-back
[ 0.000000] MTRR fixed ranges enabled:
[ 0.000000] 00000-9FFFF write-back
[ 0.000000] A0000-BFFFF uncachable
[ 0.000000] C0000-DFFFF write-through
[ 0.000000] E0000-FFFFF write-protect
[ 0.000000] MTRR variable ranges enabled:
[ 0.000000] 0 [000080000000-0000FFFFFFFF] mask 3FFF80000000 uncachable
[ 0.000000] 1 [380000000000-38FFFFFFFFFF] mask 3F0000000000 uncachable
[ 0.000000] 2 [000099000000-000099FFFFFF] mask 3FFFFF000000 write-through
[ 0.000000] 3 [00009A000000-00009AFFFFFF] mask 3FFFFF000000 write-through
[ 0.000000] 4 [381FFA000000-381FFBFFFFFF] mask 3FFFFE000000 write-through
[ 0.000000] 5 [381FFC000000-381FFC0FFFFF] mask 3FFFFFF00000 write-through
[ 0.000000] 6 [0000AD000000-0000ADFFFFFF] mask 3FFFFF000000 write-through
[ 0.000000] 7 [0000BD000000-0000BDFFFFFF] mask 3FFFFF000000 write-through
[ 0.000000] 8 disabled
[ 0.000000] 9 disabled
but /proc/mtrr report wrong:
reg00: base=0x080000000 ( 2048MB), size= 2048MB, count=1: uncachable
reg01: base=0x80000000000 (8388608MB), size=1048576MB, count=1: uncachable
reg02: base=0x099000000 ( 2448MB), size= 16MB, count=1: write-through
reg03: base=0x09a000000 ( 2464MB), size= 16MB, count=1: write-through
reg04: base=0x81ffa000000 (8519584MB), size= 32MB, count=1: write-through
reg05: base=0x81ffc000000 (8519616MB), size= 1MB, count=1: write-through
reg06: base=0x0ad000000 ( 2768MB), size= 16MB, count=1: write-through
reg07: base=0x0bd000000 ( 3024MB), size= 16MB, count=1: write-through
reg08: base=0x09b000000 ( 2480MB), size= 16MB, count=1: write-combining
so bit 44 and bit 45 get cut off.
We have problems in arch/x86/kernel/cpu/mtrr/generic.c::generic_get_mtrr().
1. for base, we miss cast base_lo to 64bit before shifting.
Fix that by adding u64 casting.
2. for size, it only can handle 44 bits aka 32bits + page_shift
Fix that with 64bit mask instead of 32bit mask_lo, then range could be
more than 44bits.
At the same time, we need to update size_or_mask for old cpus that does
support cpuid 0x80000008 to get phys_addr. Need to set high 32bits
to all 1s, otherwise will not get correct size for them.
Also fix mtrr_add_page: it should check base and (base + size - 1)
instead of base and size, as base and size could be small but
base + size could bigger enough to be out of boundary. We can
use boot_cpu_data.x86_phys_bits directly to avoid size_or_mask.
So When are we going to have size more than 44bits? that is 16TiB.
after patch we have right ouput:
reg00: base=0x080000000 ( 2048MB), size= 2048MB, count=1: uncachable
reg01: base=0x380000000000 (58720256MB), size=1048576MB, count=1: uncachable
reg02: base=0x099000000 ( 2448MB), size= 16MB, count=1: write-through
reg03: base=0x09a000000 ( 2464MB), size= 16MB, count=1: write-through
reg04: base=0x381ffa000000 (58851232MB), size= 32MB, count=1: write-through
reg05: base=0x381ffc000000 (58851264MB), size= 1MB, count=1: write-through
reg06: base=0x0ad000000 ( 2768MB), size= 16MB, count=1: write-through
reg07: base=0x0bd000000 ( 3024MB), size= 16MB, count=1: write-through
reg08: base=0x09b000000 ( 2480MB), size= 16MB, count=1: write-combining
-v2: simply checking in mtrr_add_page according to hpa.
[ hpa: This probably wants to go into -stable only after having sat in
mainline for a bit. It is not a regression. ]
Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Link: http://lkml.kernel.org/r/1371162815-29931-1-git-send-email-yinghai@kernel.org
Cc: <stable@vger.kernel.org>
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
2013-06-14 06:33:35 +08:00
|
|
|
size_or_mask = SIZE_OR_MASK_BITS(36);
|
2005-04-17 06:20:36 +08:00
|
|
|
size_and_mask = 0x00f00000;
|
2008-04-29 18:52:33 +08:00
|
|
|
phys_addr = 36;
|
2005-04-17 06:25:10 +08:00
|
|
|
|
2009-07-04 10:26:28 +08:00
|
|
|
/*
|
|
|
|
* This is an AMD specific MSR, but we assume(hope?) that
|
2012-12-07 04:16:11 +08:00
|
|
|
* Intel will implement it too when they extend the address
|
2009-07-04 10:26:28 +08:00
|
|
|
* bus of the Xeon.
|
|
|
|
*/
|
2005-04-17 06:25:10 +08:00
|
|
|
if (cpuid_eax(0x80000000) >= 0x80000008) {
|
|
|
|
phys_addr = cpuid_eax(0x80000008) & 0xff;
|
2005-11-06 00:25:54 +08:00
|
|
|
/* CPUID workaround for Intel 0F33/0F34 CPU */
|
|
|
|
if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
|
|
|
|
boot_cpu_data.x86 == 0xF &&
|
|
|
|
boot_cpu_data.x86_model == 0x3 &&
|
|
|
|
(boot_cpu_data.x86_mask == 0x3 ||
|
|
|
|
boot_cpu_data.x86_mask == 0x4))
|
|
|
|
phys_addr = 36;
|
|
|
|
|
x86: Fix /proc/mtrr with base/size more than 44bits
On one sytem that mtrr range is more then 44bits, in dmesg we have
[ 0.000000] MTRR default type: write-back
[ 0.000000] MTRR fixed ranges enabled:
[ 0.000000] 00000-9FFFF write-back
[ 0.000000] A0000-BFFFF uncachable
[ 0.000000] C0000-DFFFF write-through
[ 0.000000] E0000-FFFFF write-protect
[ 0.000000] MTRR variable ranges enabled:
[ 0.000000] 0 [000080000000-0000FFFFFFFF] mask 3FFF80000000 uncachable
[ 0.000000] 1 [380000000000-38FFFFFFFFFF] mask 3F0000000000 uncachable
[ 0.000000] 2 [000099000000-000099FFFFFF] mask 3FFFFF000000 write-through
[ 0.000000] 3 [00009A000000-00009AFFFFFF] mask 3FFFFF000000 write-through
[ 0.000000] 4 [381FFA000000-381FFBFFFFFF] mask 3FFFFE000000 write-through
[ 0.000000] 5 [381FFC000000-381FFC0FFFFF] mask 3FFFFFF00000 write-through
[ 0.000000] 6 [0000AD000000-0000ADFFFFFF] mask 3FFFFF000000 write-through
[ 0.000000] 7 [0000BD000000-0000BDFFFFFF] mask 3FFFFF000000 write-through
[ 0.000000] 8 disabled
[ 0.000000] 9 disabled
but /proc/mtrr report wrong:
reg00: base=0x080000000 ( 2048MB), size= 2048MB, count=1: uncachable
reg01: base=0x80000000000 (8388608MB), size=1048576MB, count=1: uncachable
reg02: base=0x099000000 ( 2448MB), size= 16MB, count=1: write-through
reg03: base=0x09a000000 ( 2464MB), size= 16MB, count=1: write-through
reg04: base=0x81ffa000000 (8519584MB), size= 32MB, count=1: write-through
reg05: base=0x81ffc000000 (8519616MB), size= 1MB, count=1: write-through
reg06: base=0x0ad000000 ( 2768MB), size= 16MB, count=1: write-through
reg07: base=0x0bd000000 ( 3024MB), size= 16MB, count=1: write-through
reg08: base=0x09b000000 ( 2480MB), size= 16MB, count=1: write-combining
so bit 44 and bit 45 get cut off.
We have problems in arch/x86/kernel/cpu/mtrr/generic.c::generic_get_mtrr().
1. for base, we miss cast base_lo to 64bit before shifting.
Fix that by adding u64 casting.
2. for size, it only can handle 44 bits aka 32bits + page_shift
Fix that with 64bit mask instead of 32bit mask_lo, then range could be
more than 44bits.
At the same time, we need to update size_or_mask for old cpus that does
support cpuid 0x80000008 to get phys_addr. Need to set high 32bits
to all 1s, otherwise will not get correct size for them.
Also fix mtrr_add_page: it should check base and (base + size - 1)
instead of base and size, as base and size could be small but
base + size could bigger enough to be out of boundary. We can
use boot_cpu_data.x86_phys_bits directly to avoid size_or_mask.
So When are we going to have size more than 44bits? that is 16TiB.
after patch we have right ouput:
reg00: base=0x080000000 ( 2048MB), size= 2048MB, count=1: uncachable
reg01: base=0x380000000000 (58720256MB), size=1048576MB, count=1: uncachable
reg02: base=0x099000000 ( 2448MB), size= 16MB, count=1: write-through
reg03: base=0x09a000000 ( 2464MB), size= 16MB, count=1: write-through
reg04: base=0x381ffa000000 (58851232MB), size= 32MB, count=1: write-through
reg05: base=0x381ffc000000 (58851264MB), size= 1MB, count=1: write-through
reg06: base=0x0ad000000 ( 2768MB), size= 16MB, count=1: write-through
reg07: base=0x0bd000000 ( 3024MB), size= 16MB, count=1: write-through
reg08: base=0x09b000000 ( 2480MB), size= 16MB, count=1: write-combining
-v2: simply checking in mtrr_add_page according to hpa.
[ hpa: This probably wants to go into -stable only after having sat in
mainline for a bit. It is not a regression. ]
Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Link: http://lkml.kernel.org/r/1371162815-29931-1-git-send-email-yinghai@kernel.org
Cc: <stable@vger.kernel.org>
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
2013-06-14 06:33:35 +08:00
|
|
|
size_or_mask = SIZE_OR_MASK_BITS(phys_addr);
|
2007-02-13 20:26:23 +08:00
|
|
|
size_and_mask = ~size_or_mask & 0xfffff00000ULL;
|
2005-04-17 06:25:10 +08:00
|
|
|
} else if (boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR &&
|
|
|
|
boot_cpu_data.x86 == 6) {
|
2009-07-04 10:26:28 +08:00
|
|
|
/*
|
|
|
|
* VIA C* family have Intel style MTRRs,
|
|
|
|
* but don't support PAE
|
|
|
|
*/
|
x86: Fix /proc/mtrr with base/size more than 44bits
On one sytem that mtrr range is more then 44bits, in dmesg we have
[ 0.000000] MTRR default type: write-back
[ 0.000000] MTRR fixed ranges enabled:
[ 0.000000] 00000-9FFFF write-back
[ 0.000000] A0000-BFFFF uncachable
[ 0.000000] C0000-DFFFF write-through
[ 0.000000] E0000-FFFFF write-protect
[ 0.000000] MTRR variable ranges enabled:
[ 0.000000] 0 [000080000000-0000FFFFFFFF] mask 3FFF80000000 uncachable
[ 0.000000] 1 [380000000000-38FFFFFFFFFF] mask 3F0000000000 uncachable
[ 0.000000] 2 [000099000000-000099FFFFFF] mask 3FFFFF000000 write-through
[ 0.000000] 3 [00009A000000-00009AFFFFFF] mask 3FFFFF000000 write-through
[ 0.000000] 4 [381FFA000000-381FFBFFFFFF] mask 3FFFFE000000 write-through
[ 0.000000] 5 [381FFC000000-381FFC0FFFFF] mask 3FFFFFF00000 write-through
[ 0.000000] 6 [0000AD000000-0000ADFFFFFF] mask 3FFFFF000000 write-through
[ 0.000000] 7 [0000BD000000-0000BDFFFFFF] mask 3FFFFF000000 write-through
[ 0.000000] 8 disabled
[ 0.000000] 9 disabled
but /proc/mtrr report wrong:
reg00: base=0x080000000 ( 2048MB), size= 2048MB, count=1: uncachable
reg01: base=0x80000000000 (8388608MB), size=1048576MB, count=1: uncachable
reg02: base=0x099000000 ( 2448MB), size= 16MB, count=1: write-through
reg03: base=0x09a000000 ( 2464MB), size= 16MB, count=1: write-through
reg04: base=0x81ffa000000 (8519584MB), size= 32MB, count=1: write-through
reg05: base=0x81ffc000000 (8519616MB), size= 1MB, count=1: write-through
reg06: base=0x0ad000000 ( 2768MB), size= 16MB, count=1: write-through
reg07: base=0x0bd000000 ( 3024MB), size= 16MB, count=1: write-through
reg08: base=0x09b000000 ( 2480MB), size= 16MB, count=1: write-combining
so bit 44 and bit 45 get cut off.
We have problems in arch/x86/kernel/cpu/mtrr/generic.c::generic_get_mtrr().
1. for base, we miss cast base_lo to 64bit before shifting.
Fix that by adding u64 casting.
2. for size, it only can handle 44 bits aka 32bits + page_shift
Fix that with 64bit mask instead of 32bit mask_lo, then range could be
more than 44bits.
At the same time, we need to update size_or_mask for old cpus that does
support cpuid 0x80000008 to get phys_addr. Need to set high 32bits
to all 1s, otherwise will not get correct size for them.
Also fix mtrr_add_page: it should check base and (base + size - 1)
instead of base and size, as base and size could be small but
base + size could bigger enough to be out of boundary. We can
use boot_cpu_data.x86_phys_bits directly to avoid size_or_mask.
So When are we going to have size more than 44bits? that is 16TiB.
after patch we have right ouput:
reg00: base=0x080000000 ( 2048MB), size= 2048MB, count=1: uncachable
reg01: base=0x380000000000 (58720256MB), size=1048576MB, count=1: uncachable
reg02: base=0x099000000 ( 2448MB), size= 16MB, count=1: write-through
reg03: base=0x09a000000 ( 2464MB), size= 16MB, count=1: write-through
reg04: base=0x381ffa000000 (58851232MB), size= 32MB, count=1: write-through
reg05: base=0x381ffc000000 (58851264MB), size= 1MB, count=1: write-through
reg06: base=0x0ad000000 ( 2768MB), size= 16MB, count=1: write-through
reg07: base=0x0bd000000 ( 3024MB), size= 16MB, count=1: write-through
reg08: base=0x09b000000 ( 2480MB), size= 16MB, count=1: write-combining
-v2: simply checking in mtrr_add_page according to hpa.
[ hpa: This probably wants to go into -stable only after having sat in
mainline for a bit. It is not a regression. ]
Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Link: http://lkml.kernel.org/r/1371162815-29931-1-git-send-email-yinghai@kernel.org
Cc: <stable@vger.kernel.org>
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
2013-06-14 06:33:35 +08:00
|
|
|
size_or_mask = SIZE_OR_MASK_BITS(32);
|
2005-04-17 06:25:10 +08:00
|
|
|
size_and_mask = 0;
|
2008-04-29 18:52:33 +08:00
|
|
|
phys_addr = 32;
|
2005-04-17 06:20:36 +08:00
|
|
|
}
|
|
|
|
} else {
|
|
|
|
switch (boot_cpu_data.x86_vendor) {
|
|
|
|
case X86_VENDOR_AMD:
|
|
|
|
if (cpu_has_k6_mtrr) {
|
|
|
|
/* Pre-Athlon (K6) AMD CPU MTRRs */
|
|
|
|
mtrr_if = mtrr_ops[X86_VENDOR_AMD];
|
x86: Fix /proc/mtrr with base/size more than 44bits
On one sytem that mtrr range is more then 44bits, in dmesg we have
[ 0.000000] MTRR default type: write-back
[ 0.000000] MTRR fixed ranges enabled:
[ 0.000000] 00000-9FFFF write-back
[ 0.000000] A0000-BFFFF uncachable
[ 0.000000] C0000-DFFFF write-through
[ 0.000000] E0000-FFFFF write-protect
[ 0.000000] MTRR variable ranges enabled:
[ 0.000000] 0 [000080000000-0000FFFFFFFF] mask 3FFF80000000 uncachable
[ 0.000000] 1 [380000000000-38FFFFFFFFFF] mask 3F0000000000 uncachable
[ 0.000000] 2 [000099000000-000099FFFFFF] mask 3FFFFF000000 write-through
[ 0.000000] 3 [00009A000000-00009AFFFFFF] mask 3FFFFF000000 write-through
[ 0.000000] 4 [381FFA000000-381FFBFFFFFF] mask 3FFFFE000000 write-through
[ 0.000000] 5 [381FFC000000-381FFC0FFFFF] mask 3FFFFFF00000 write-through
[ 0.000000] 6 [0000AD000000-0000ADFFFFFF] mask 3FFFFF000000 write-through
[ 0.000000] 7 [0000BD000000-0000BDFFFFFF] mask 3FFFFF000000 write-through
[ 0.000000] 8 disabled
[ 0.000000] 9 disabled
but /proc/mtrr report wrong:
reg00: base=0x080000000 ( 2048MB), size= 2048MB, count=1: uncachable
reg01: base=0x80000000000 (8388608MB), size=1048576MB, count=1: uncachable
reg02: base=0x099000000 ( 2448MB), size= 16MB, count=1: write-through
reg03: base=0x09a000000 ( 2464MB), size= 16MB, count=1: write-through
reg04: base=0x81ffa000000 (8519584MB), size= 32MB, count=1: write-through
reg05: base=0x81ffc000000 (8519616MB), size= 1MB, count=1: write-through
reg06: base=0x0ad000000 ( 2768MB), size= 16MB, count=1: write-through
reg07: base=0x0bd000000 ( 3024MB), size= 16MB, count=1: write-through
reg08: base=0x09b000000 ( 2480MB), size= 16MB, count=1: write-combining
so bit 44 and bit 45 get cut off.
We have problems in arch/x86/kernel/cpu/mtrr/generic.c::generic_get_mtrr().
1. for base, we miss cast base_lo to 64bit before shifting.
Fix that by adding u64 casting.
2. for size, it only can handle 44 bits aka 32bits + page_shift
Fix that with 64bit mask instead of 32bit mask_lo, then range could be
more than 44bits.
At the same time, we need to update size_or_mask for old cpus that does
support cpuid 0x80000008 to get phys_addr. Need to set high 32bits
to all 1s, otherwise will not get correct size for them.
Also fix mtrr_add_page: it should check base and (base + size - 1)
instead of base and size, as base and size could be small but
base + size could bigger enough to be out of boundary. We can
use boot_cpu_data.x86_phys_bits directly to avoid size_or_mask.
So When are we going to have size more than 44bits? that is 16TiB.
after patch we have right ouput:
reg00: base=0x080000000 ( 2048MB), size= 2048MB, count=1: uncachable
reg01: base=0x380000000000 (58720256MB), size=1048576MB, count=1: uncachable
reg02: base=0x099000000 ( 2448MB), size= 16MB, count=1: write-through
reg03: base=0x09a000000 ( 2464MB), size= 16MB, count=1: write-through
reg04: base=0x381ffa000000 (58851232MB), size= 32MB, count=1: write-through
reg05: base=0x381ffc000000 (58851264MB), size= 1MB, count=1: write-through
reg06: base=0x0ad000000 ( 2768MB), size= 16MB, count=1: write-through
reg07: base=0x0bd000000 ( 3024MB), size= 16MB, count=1: write-through
reg08: base=0x09b000000 ( 2480MB), size= 16MB, count=1: write-combining
-v2: simply checking in mtrr_add_page according to hpa.
[ hpa: This probably wants to go into -stable only after having sat in
mainline for a bit. It is not a regression. ]
Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Link: http://lkml.kernel.org/r/1371162815-29931-1-git-send-email-yinghai@kernel.org
Cc: <stable@vger.kernel.org>
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
2013-06-14 06:33:35 +08:00
|
|
|
size_or_mask = SIZE_OR_MASK_BITS(32);
|
2005-04-17 06:20:36 +08:00
|
|
|
size_and_mask = 0;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case X86_VENDOR_CENTAUR:
|
|
|
|
if (cpu_has_centaur_mcr) {
|
|
|
|
mtrr_if = mtrr_ops[X86_VENDOR_CENTAUR];
|
x86: Fix /proc/mtrr with base/size more than 44bits
On one sytem that mtrr range is more then 44bits, in dmesg we have
[ 0.000000] MTRR default type: write-back
[ 0.000000] MTRR fixed ranges enabled:
[ 0.000000] 00000-9FFFF write-back
[ 0.000000] A0000-BFFFF uncachable
[ 0.000000] C0000-DFFFF write-through
[ 0.000000] E0000-FFFFF write-protect
[ 0.000000] MTRR variable ranges enabled:
[ 0.000000] 0 [000080000000-0000FFFFFFFF] mask 3FFF80000000 uncachable
[ 0.000000] 1 [380000000000-38FFFFFFFFFF] mask 3F0000000000 uncachable
[ 0.000000] 2 [000099000000-000099FFFFFF] mask 3FFFFF000000 write-through
[ 0.000000] 3 [00009A000000-00009AFFFFFF] mask 3FFFFF000000 write-through
[ 0.000000] 4 [381FFA000000-381FFBFFFFFF] mask 3FFFFE000000 write-through
[ 0.000000] 5 [381FFC000000-381FFC0FFFFF] mask 3FFFFFF00000 write-through
[ 0.000000] 6 [0000AD000000-0000ADFFFFFF] mask 3FFFFF000000 write-through
[ 0.000000] 7 [0000BD000000-0000BDFFFFFF] mask 3FFFFF000000 write-through
[ 0.000000] 8 disabled
[ 0.000000] 9 disabled
but /proc/mtrr report wrong:
reg00: base=0x080000000 ( 2048MB), size= 2048MB, count=1: uncachable
reg01: base=0x80000000000 (8388608MB), size=1048576MB, count=1: uncachable
reg02: base=0x099000000 ( 2448MB), size= 16MB, count=1: write-through
reg03: base=0x09a000000 ( 2464MB), size= 16MB, count=1: write-through
reg04: base=0x81ffa000000 (8519584MB), size= 32MB, count=1: write-through
reg05: base=0x81ffc000000 (8519616MB), size= 1MB, count=1: write-through
reg06: base=0x0ad000000 ( 2768MB), size= 16MB, count=1: write-through
reg07: base=0x0bd000000 ( 3024MB), size= 16MB, count=1: write-through
reg08: base=0x09b000000 ( 2480MB), size= 16MB, count=1: write-combining
so bit 44 and bit 45 get cut off.
We have problems in arch/x86/kernel/cpu/mtrr/generic.c::generic_get_mtrr().
1. for base, we miss cast base_lo to 64bit before shifting.
Fix that by adding u64 casting.
2. for size, it only can handle 44 bits aka 32bits + page_shift
Fix that with 64bit mask instead of 32bit mask_lo, then range could be
more than 44bits.
At the same time, we need to update size_or_mask for old cpus that does
support cpuid 0x80000008 to get phys_addr. Need to set high 32bits
to all 1s, otherwise will not get correct size for them.
Also fix mtrr_add_page: it should check base and (base + size - 1)
instead of base and size, as base and size could be small but
base + size could bigger enough to be out of boundary. We can
use boot_cpu_data.x86_phys_bits directly to avoid size_or_mask.
So When are we going to have size more than 44bits? that is 16TiB.
after patch we have right ouput:
reg00: base=0x080000000 ( 2048MB), size= 2048MB, count=1: uncachable
reg01: base=0x380000000000 (58720256MB), size=1048576MB, count=1: uncachable
reg02: base=0x099000000 ( 2448MB), size= 16MB, count=1: write-through
reg03: base=0x09a000000 ( 2464MB), size= 16MB, count=1: write-through
reg04: base=0x381ffa000000 (58851232MB), size= 32MB, count=1: write-through
reg05: base=0x381ffc000000 (58851264MB), size= 1MB, count=1: write-through
reg06: base=0x0ad000000 ( 2768MB), size= 16MB, count=1: write-through
reg07: base=0x0bd000000 ( 3024MB), size= 16MB, count=1: write-through
reg08: base=0x09b000000 ( 2480MB), size= 16MB, count=1: write-combining
-v2: simply checking in mtrr_add_page according to hpa.
[ hpa: This probably wants to go into -stable only after having sat in
mainline for a bit. It is not a regression. ]
Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Link: http://lkml.kernel.org/r/1371162815-29931-1-git-send-email-yinghai@kernel.org
Cc: <stable@vger.kernel.org>
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
2013-06-14 06:33:35 +08:00
|
|
|
size_or_mask = SIZE_OR_MASK_BITS(32);
|
2005-04-17 06:20:36 +08:00
|
|
|
size_and_mask = 0;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case X86_VENDOR_CYRIX:
|
|
|
|
if (cpu_has_cyrix_arr) {
|
|
|
|
mtrr_if = mtrr_ops[X86_VENDOR_CYRIX];
|
x86: Fix /proc/mtrr with base/size more than 44bits
On one sytem that mtrr range is more then 44bits, in dmesg we have
[ 0.000000] MTRR default type: write-back
[ 0.000000] MTRR fixed ranges enabled:
[ 0.000000] 00000-9FFFF write-back
[ 0.000000] A0000-BFFFF uncachable
[ 0.000000] C0000-DFFFF write-through
[ 0.000000] E0000-FFFFF write-protect
[ 0.000000] MTRR variable ranges enabled:
[ 0.000000] 0 [000080000000-0000FFFFFFFF] mask 3FFF80000000 uncachable
[ 0.000000] 1 [380000000000-38FFFFFFFFFF] mask 3F0000000000 uncachable
[ 0.000000] 2 [000099000000-000099FFFFFF] mask 3FFFFF000000 write-through
[ 0.000000] 3 [00009A000000-00009AFFFFFF] mask 3FFFFF000000 write-through
[ 0.000000] 4 [381FFA000000-381FFBFFFFFF] mask 3FFFFE000000 write-through
[ 0.000000] 5 [381FFC000000-381FFC0FFFFF] mask 3FFFFFF00000 write-through
[ 0.000000] 6 [0000AD000000-0000ADFFFFFF] mask 3FFFFF000000 write-through
[ 0.000000] 7 [0000BD000000-0000BDFFFFFF] mask 3FFFFF000000 write-through
[ 0.000000] 8 disabled
[ 0.000000] 9 disabled
but /proc/mtrr report wrong:
reg00: base=0x080000000 ( 2048MB), size= 2048MB, count=1: uncachable
reg01: base=0x80000000000 (8388608MB), size=1048576MB, count=1: uncachable
reg02: base=0x099000000 ( 2448MB), size= 16MB, count=1: write-through
reg03: base=0x09a000000 ( 2464MB), size= 16MB, count=1: write-through
reg04: base=0x81ffa000000 (8519584MB), size= 32MB, count=1: write-through
reg05: base=0x81ffc000000 (8519616MB), size= 1MB, count=1: write-through
reg06: base=0x0ad000000 ( 2768MB), size= 16MB, count=1: write-through
reg07: base=0x0bd000000 ( 3024MB), size= 16MB, count=1: write-through
reg08: base=0x09b000000 ( 2480MB), size= 16MB, count=1: write-combining
so bit 44 and bit 45 get cut off.
We have problems in arch/x86/kernel/cpu/mtrr/generic.c::generic_get_mtrr().
1. for base, we miss cast base_lo to 64bit before shifting.
Fix that by adding u64 casting.
2. for size, it only can handle 44 bits aka 32bits + page_shift
Fix that with 64bit mask instead of 32bit mask_lo, then range could be
more than 44bits.
At the same time, we need to update size_or_mask for old cpus that does
support cpuid 0x80000008 to get phys_addr. Need to set high 32bits
to all 1s, otherwise will not get correct size for them.
Also fix mtrr_add_page: it should check base and (base + size - 1)
instead of base and size, as base and size could be small but
base + size could bigger enough to be out of boundary. We can
use boot_cpu_data.x86_phys_bits directly to avoid size_or_mask.
So When are we going to have size more than 44bits? that is 16TiB.
after patch we have right ouput:
reg00: base=0x080000000 ( 2048MB), size= 2048MB, count=1: uncachable
reg01: base=0x380000000000 (58720256MB), size=1048576MB, count=1: uncachable
reg02: base=0x099000000 ( 2448MB), size= 16MB, count=1: write-through
reg03: base=0x09a000000 ( 2464MB), size= 16MB, count=1: write-through
reg04: base=0x381ffa000000 (58851232MB), size= 32MB, count=1: write-through
reg05: base=0x381ffc000000 (58851264MB), size= 1MB, count=1: write-through
reg06: base=0x0ad000000 ( 2768MB), size= 16MB, count=1: write-through
reg07: base=0x0bd000000 ( 3024MB), size= 16MB, count=1: write-through
reg08: base=0x09b000000 ( 2480MB), size= 16MB, count=1: write-combining
-v2: simply checking in mtrr_add_page according to hpa.
[ hpa: This probably wants to go into -stable only after having sat in
mainline for a bit. It is not a regression. ]
Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Link: http://lkml.kernel.org/r/1371162815-29931-1-git-send-email-yinghai@kernel.org
Cc: <stable@vger.kernel.org>
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
2013-06-14 06:33:35 +08:00
|
|
|
size_or_mask = SIZE_OR_MASK_BITS(32);
|
2005-04-17 06:20:36 +08:00
|
|
|
size_and_mask = 0;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (mtrr_if) {
|
|
|
|
set_num_var_ranges();
|
|
|
|
init_table();
|
2008-04-29 18:52:33 +08:00
|
|
|
if (use_intel()) {
|
2005-07-08 08:56:38 +08:00
|
|
|
get_mtrr_state();
|
2008-04-29 18:52:33 +08:00
|
|
|
|
2008-05-02 17:40:22 +08:00
|
|
|
if (mtrr_cleanup(phys_addr)) {
|
|
|
|
changed_by_mtrr_cleanup = 1;
|
2008-04-29 18:52:33 +08:00
|
|
|
mtrr_if->set_all();
|
2008-05-02 17:40:22 +08:00
|
|
|
}
|
2008-04-29 18:52:33 +08:00
|
|
|
}
|
2005-04-17 06:20:36 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2005-07-08 08:56:38 +08:00
|
|
|
void mtrr_ap_init(void)
|
|
|
|
{
|
2009-08-20 09:05:36 +08:00
|
|
|
if (!use_intel() || mtrr_aps_delayed_init)
|
2005-07-08 08:56:38 +08:00
|
|
|
return;
|
|
|
|
/*
|
2009-07-04 10:26:28 +08:00
|
|
|
* Ideally we should hold mtrr_mutex here to avoid mtrr entries
|
|
|
|
* changed, but this routine will be called in cpu boot time,
|
|
|
|
* holding the lock breaks it.
|
|
|
|
*
|
|
|
|
* This routine is called in two cases:
|
|
|
|
*
|
|
|
|
* 1. very earily time of software resume, when there absolutely
|
|
|
|
* isn't mtrr entry changes;
|
|
|
|
*
|
|
|
|
* 2. cpu hotadd time. We let mtrr_add/del_page hold cpuhotplug
|
|
|
|
* lock to prevent mtrr entry changes
|
2005-07-08 08:56:38 +08:00
|
|
|
*/
|
2011-06-24 02:19:29 +08:00
|
|
|
set_mtrr_from_inactive_cpu(~0U, 0, 0, 0);
|
2005-07-08 08:56:38 +08:00
|
|
|
}
|
|
|
|
|
[PATCH] x86: Save the MTRRs of the BSP before booting an AP
Applied fix by Andew Morton:
http://lkml.org/lkml/2007/4/8/88 - Fix `make headers_check'.
AMD and Intel x86 CPU manuals state that it is the responsibility of
system software to initialize and maintain MTRR consistency across
all processors in Multi-Processing Environments.
Quote from page 188 of the AMD64 System Programming manual (Volume 2):
7.6.5 MTRRs in Multi-Processing Environments
"In multi-processing environments, the MTRRs located in all processors must
characterize memory in the same way. Generally, this means that identical
values are written to the MTRRs used by the processors." (short omission here)
"Failure to do so may result in coherency violations or loss of atomicity.
Processor implementations do not check the MTRR settings in other processors
to ensure consistency. It is the responsibility of system software to
initialize and maintain MTRR consistency across all processors."
Current Linux MTRR code already implements the above in the case that the
BIOS does not properly initialize MTRRs on the secondary processors,
but the case where the fixed-range MTRRs of the boot processor are changed
after Linux started to boot, before the initialsation of a secondary
processor, is not handled yet.
In this case, secondary processors are currently initialized by Linux
with MTRRs which the boot processor had very early, when mtrr_bp_init()
did run, but not with the MTRRs which the boot processor uses at the
time when that secondary processors is actually booted,
causing differing MTRR contents on the secondary processors.
Such situation happens on Acer Ferrari 1000 and 5000 notebooks where the
BIOS enables and sets AMD-specific IORR bits in the fixed-range MTRRs
of the boot processor when it transitions the system into ACPI mode.
The SMI handler of the BIOS does this in SMM, entered while Linux ACPI
code runs acpi_enable().
Other occasions where the SMI handler of the BIOS may change bits in
the MTRRs could occur as well. To initialize newly booted secodary
processors with the fixed-range MTRRs which the boot processor uses
at that time, this patch saves the fixed-range MTRRs of the boot
processor before new secondary processors are started. When the
secondary processors run their Linux initialisation code, their
fixed-range MTRRs will be updated with the saved fixed-range MTRRs.
If CONFIG_MTRR is not set, we define mtrr_save_state
as an empty statement because there is nothing to do.
Possible TODOs:
*) CPU-hotplugging outside of SMP suspend/resume is not yet tested
with this patch.
*) If, even in this case, an AP never runs i386/do_boot_cpu or x86_64/cpu_up,
then the calls to mtrr_save_state() could be replaced by calls to
mtrr_save_fixed_ranges(NULL) and mtrr_save_state() would not be
needed.
That would need either verification of the CPU-hotplug code or
at least a test on a >2 CPU machine.
*) The MTRRs of other running processors are not yet checked at this
time but it might be interesting to syncronize the MTTRs of all
processors before booting. That would be an incremental patch,
but of rather low priority since there is no machine known so
far which would require this.
AK: moved prototypes on x86-64 around to fix warnings
Signed-off-by: Bernhard Kaindl <bk@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Andi Kleen <ak@suse.de>
Cc: Andi Kleen <ak@suse.de>
Cc: Dave Jones <davej@codemonkey.org.uk>
2007-05-03 01:27:17 +08:00
|
|
|
/**
|
2012-11-14 03:32:48 +08:00
|
|
|
* Save current fixed-range MTRR state of the first cpu in cpu_online_mask.
|
[PATCH] x86: Save the MTRRs of the BSP before booting an AP
Applied fix by Andew Morton:
http://lkml.org/lkml/2007/4/8/88 - Fix `make headers_check'.
AMD and Intel x86 CPU manuals state that it is the responsibility of
system software to initialize and maintain MTRR consistency across
all processors in Multi-Processing Environments.
Quote from page 188 of the AMD64 System Programming manual (Volume 2):
7.6.5 MTRRs in Multi-Processing Environments
"In multi-processing environments, the MTRRs located in all processors must
characterize memory in the same way. Generally, this means that identical
values are written to the MTRRs used by the processors." (short omission here)
"Failure to do so may result in coherency violations or loss of atomicity.
Processor implementations do not check the MTRR settings in other processors
to ensure consistency. It is the responsibility of system software to
initialize and maintain MTRR consistency across all processors."
Current Linux MTRR code already implements the above in the case that the
BIOS does not properly initialize MTRRs on the secondary processors,
but the case where the fixed-range MTRRs of the boot processor are changed
after Linux started to boot, before the initialsation of a secondary
processor, is not handled yet.
In this case, secondary processors are currently initialized by Linux
with MTRRs which the boot processor had very early, when mtrr_bp_init()
did run, but not with the MTRRs which the boot processor uses at the
time when that secondary processors is actually booted,
causing differing MTRR contents on the secondary processors.
Such situation happens on Acer Ferrari 1000 and 5000 notebooks where the
BIOS enables and sets AMD-specific IORR bits in the fixed-range MTRRs
of the boot processor when it transitions the system into ACPI mode.
The SMI handler of the BIOS does this in SMM, entered while Linux ACPI
code runs acpi_enable().
Other occasions where the SMI handler of the BIOS may change bits in
the MTRRs could occur as well. To initialize newly booted secodary
processors with the fixed-range MTRRs which the boot processor uses
at that time, this patch saves the fixed-range MTRRs of the boot
processor before new secondary processors are started. When the
secondary processors run their Linux initialisation code, their
fixed-range MTRRs will be updated with the saved fixed-range MTRRs.
If CONFIG_MTRR is not set, we define mtrr_save_state
as an empty statement because there is nothing to do.
Possible TODOs:
*) CPU-hotplugging outside of SMP suspend/resume is not yet tested
with this patch.
*) If, even in this case, an AP never runs i386/do_boot_cpu or x86_64/cpu_up,
then the calls to mtrr_save_state() could be replaced by calls to
mtrr_save_fixed_ranges(NULL) and mtrr_save_state() would not be
needed.
That would need either verification of the CPU-hotplug code or
at least a test on a >2 CPU machine.
*) The MTRRs of other running processors are not yet checked at this
time but it might be interesting to syncronize the MTTRs of all
processors before booting. That would be an incremental patch,
but of rather low priority since there is no machine known so
far which would require this.
AK: moved prototypes on x86-64 around to fix warnings
Signed-off-by: Bernhard Kaindl <bk@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Andi Kleen <ak@suse.de>
Cc: Andi Kleen <ak@suse.de>
Cc: Dave Jones <davej@codemonkey.org.uk>
2007-05-03 01:27:17 +08:00
|
|
|
*/
|
|
|
|
void mtrr_save_state(void)
|
|
|
|
{
|
2012-11-14 03:32:48 +08:00
|
|
|
int first_cpu;
|
|
|
|
|
|
|
|
get_online_cpus();
|
|
|
|
first_cpu = cpumask_first(cpu_online_mask);
|
|
|
|
smp_call_function_single(first_cpu, mtrr_save_fixed_ranges, NULL, 1);
|
|
|
|
put_online_cpus();
|
[PATCH] x86: Save the MTRRs of the BSP before booting an AP
Applied fix by Andew Morton:
http://lkml.org/lkml/2007/4/8/88 - Fix `make headers_check'.
AMD and Intel x86 CPU manuals state that it is the responsibility of
system software to initialize and maintain MTRR consistency across
all processors in Multi-Processing Environments.
Quote from page 188 of the AMD64 System Programming manual (Volume 2):
7.6.5 MTRRs in Multi-Processing Environments
"In multi-processing environments, the MTRRs located in all processors must
characterize memory in the same way. Generally, this means that identical
values are written to the MTRRs used by the processors." (short omission here)
"Failure to do so may result in coherency violations or loss of atomicity.
Processor implementations do not check the MTRR settings in other processors
to ensure consistency. It is the responsibility of system software to
initialize and maintain MTRR consistency across all processors."
Current Linux MTRR code already implements the above in the case that the
BIOS does not properly initialize MTRRs on the secondary processors,
but the case where the fixed-range MTRRs of the boot processor are changed
after Linux started to boot, before the initialsation of a secondary
processor, is not handled yet.
In this case, secondary processors are currently initialized by Linux
with MTRRs which the boot processor had very early, when mtrr_bp_init()
did run, but not with the MTRRs which the boot processor uses at the
time when that secondary processors is actually booted,
causing differing MTRR contents on the secondary processors.
Such situation happens on Acer Ferrari 1000 and 5000 notebooks where the
BIOS enables and sets AMD-specific IORR bits in the fixed-range MTRRs
of the boot processor when it transitions the system into ACPI mode.
The SMI handler of the BIOS does this in SMM, entered while Linux ACPI
code runs acpi_enable().
Other occasions where the SMI handler of the BIOS may change bits in
the MTRRs could occur as well. To initialize newly booted secodary
processors with the fixed-range MTRRs which the boot processor uses
at that time, this patch saves the fixed-range MTRRs of the boot
processor before new secondary processors are started. When the
secondary processors run their Linux initialisation code, their
fixed-range MTRRs will be updated with the saved fixed-range MTRRs.
If CONFIG_MTRR is not set, we define mtrr_save_state
as an empty statement because there is nothing to do.
Possible TODOs:
*) CPU-hotplugging outside of SMP suspend/resume is not yet tested
with this patch.
*) If, even in this case, an AP never runs i386/do_boot_cpu or x86_64/cpu_up,
then the calls to mtrr_save_state() could be replaced by calls to
mtrr_save_fixed_ranges(NULL) and mtrr_save_state() would not be
needed.
That would need either verification of the CPU-hotplug code or
at least a test on a >2 CPU machine.
*) The MTRRs of other running processors are not yet checked at this
time but it might be interesting to syncronize the MTTRs of all
processors before booting. That would be an incremental patch,
but of rather low priority since there is no machine known so
far which would require this.
AK: moved prototypes on x86-64 around to fix warnings
Signed-off-by: Bernhard Kaindl <bk@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Andi Kleen <ak@suse.de>
Cc: Andi Kleen <ak@suse.de>
Cc: Dave Jones <davej@codemonkey.org.uk>
2007-05-03 01:27:17 +08:00
|
|
|
}
|
|
|
|
|
2009-08-20 09:05:36 +08:00
|
|
|
void set_mtrr_aps_delayed_init(void)
|
|
|
|
{
|
|
|
|
if (!use_intel())
|
|
|
|
return;
|
|
|
|
|
2009-08-22 08:00:02 +08:00
|
|
|
mtrr_aps_delayed_init = true;
|
2009-08-20 09:05:36 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
x86, mtrr: Avoid MTRR reprogramming on BP during boot on UP platforms
Markus Kohn ran into a hard hang regression on an acer aspire
1310, when acpi is enabled. git bisect showed the following
commit as the bad one that introduced the boot regression.
commit d0af9eed5aa91b6b7b5049cae69e5ea956fd85c3
Author: Suresh Siddha <suresh.b.siddha@intel.com>
Date: Wed Aug 19 18:05:36 2009 -0700
x86, pat/mtrr: Rendezvous all the cpus for MTRR/PAT init
Because of the UP configuration of that platform,
native_smp_prepare_cpus() bailed out (in smp_sanity_check())
before doing the set_mtrr_aps_delayed_init()
Further down the boot path, native_smp_cpus_done() will call the
delayed MTRR initialization for the AP's (mtrr_aps_init()) with
mtrr_aps_delayed_init not set. This resulted in the boot
processor reprogramming its MTRR's to the values seen during the
start of the OS boot. While this is not needed ideally, this
shouldn't have caused any side-effects. This is because the
reprogramming of MTRR's (set_mtrr_state() that gets called via
set_mtrr()) will check if the live register contents are
different from what is being asked to write and will do the actual
write only if they are different.
BP's mtrr state is read during the start of the OS boot and
typically nothing would have changed when we ask to reprogram it
on BP again because of the above scenario on an UP platform. So
on a normal UP platform no reprogramming of BP MTRR MSR's
happens and all is well.
However, on this platform, bios seems to be modifying the fixed
mtrr range registers between the start of OS boot and when we
double check the live registers for reprogramming BP MTRR
registers. And as the live registers are modified, we end up
reprogramming the MTRR's to the state seen during the start of
the OS boot.
During ACPI initialization, something in the bios (probably smi
handler?) don't like this fact and results in a hard lockup.
We didn't see this boot hang issue on this platform before the
commit d0af9eed5aa91b6b7b5049cae69e5ea956fd85c3, because only
the AP's (if any) will program its MTRR's to the value that BP
had at the start of the OS boot.
Fix this issue by checking mtrr_aps_delayed_init before
continuing further in the mtrr_aps_init(). Now, only AP's (if
any) will program its MTRR's to the BP values during boot.
Addresses https://bugzilla.novell.com/show_bug.cgi?id=623393
[ By the way, this behavior of the bios modifying MTRR's after the start
of the OS boot is not common and the kernel is not prepared to
handle this situation well. Irrespective of this issue, during
suspend/resume, linux kernel will try to reprogram the BP's MTRR values
to the values seen during the start of the OS boot. So suspend/resume might
be already broken on this platform for all linux kernel versions. ]
Reported-and-bisected-by: Markus Kohn <jabber@gmx.org>
Tested-by: Markus Kohn <jabber@gmx.org>
Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Cc: Thomas Renninger <trenn@novell.com>
Cc: Rafael Wysocki <rjw@novell.com>
Cc: Venkatesh Pallipadi <venki@google.com>
Cc: stable@kernel.org # [v2.6.32+]
LKML-Reference: <1296694975.4418.402.camel@sbsiddha-MOBL3.sc.intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2011-02-03 09:02:55 +08:00
|
|
|
* Delayed MTRR initialization for all AP's
|
2009-08-20 09:05:36 +08:00
|
|
|
*/
|
|
|
|
void mtrr_aps_init(void)
|
|
|
|
{
|
|
|
|
if (!use_intel())
|
|
|
|
return;
|
|
|
|
|
x86, mtrr: Avoid MTRR reprogramming on BP during boot on UP platforms
Markus Kohn ran into a hard hang regression on an acer aspire
1310, when acpi is enabled. git bisect showed the following
commit as the bad one that introduced the boot regression.
commit d0af9eed5aa91b6b7b5049cae69e5ea956fd85c3
Author: Suresh Siddha <suresh.b.siddha@intel.com>
Date: Wed Aug 19 18:05:36 2009 -0700
x86, pat/mtrr: Rendezvous all the cpus for MTRR/PAT init
Because of the UP configuration of that platform,
native_smp_prepare_cpus() bailed out (in smp_sanity_check())
before doing the set_mtrr_aps_delayed_init()
Further down the boot path, native_smp_cpus_done() will call the
delayed MTRR initialization for the AP's (mtrr_aps_init()) with
mtrr_aps_delayed_init not set. This resulted in the boot
processor reprogramming its MTRR's to the values seen during the
start of the OS boot. While this is not needed ideally, this
shouldn't have caused any side-effects. This is because the
reprogramming of MTRR's (set_mtrr_state() that gets called via
set_mtrr()) will check if the live register contents are
different from what is being asked to write and will do the actual
write only if they are different.
BP's mtrr state is read during the start of the OS boot and
typically nothing would have changed when we ask to reprogram it
on BP again because of the above scenario on an UP platform. So
on a normal UP platform no reprogramming of BP MTRR MSR's
happens and all is well.
However, on this platform, bios seems to be modifying the fixed
mtrr range registers between the start of OS boot and when we
double check the live registers for reprogramming BP MTRR
registers. And as the live registers are modified, we end up
reprogramming the MTRR's to the state seen during the start of
the OS boot.
During ACPI initialization, something in the bios (probably smi
handler?) don't like this fact and results in a hard lockup.
We didn't see this boot hang issue on this platform before the
commit d0af9eed5aa91b6b7b5049cae69e5ea956fd85c3, because only
the AP's (if any) will program its MTRR's to the value that BP
had at the start of the OS boot.
Fix this issue by checking mtrr_aps_delayed_init before
continuing further in the mtrr_aps_init(). Now, only AP's (if
any) will program its MTRR's to the BP values during boot.
Addresses https://bugzilla.novell.com/show_bug.cgi?id=623393
[ By the way, this behavior of the bios modifying MTRR's after the start
of the OS boot is not common and the kernel is not prepared to
handle this situation well. Irrespective of this issue, during
suspend/resume, linux kernel will try to reprogram the BP's MTRR values
to the values seen during the start of the OS boot. So suspend/resume might
be already broken on this platform for all linux kernel versions. ]
Reported-and-bisected-by: Markus Kohn <jabber@gmx.org>
Tested-by: Markus Kohn <jabber@gmx.org>
Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Cc: Thomas Renninger <trenn@novell.com>
Cc: Rafael Wysocki <rjw@novell.com>
Cc: Venkatesh Pallipadi <venki@google.com>
Cc: stable@kernel.org # [v2.6.32+]
LKML-Reference: <1296694975.4418.402.camel@sbsiddha-MOBL3.sc.intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2011-02-03 09:02:55 +08:00
|
|
|
/*
|
|
|
|
* Check if someone has requested the delay of AP MTRR initialization,
|
|
|
|
* by doing set_mtrr_aps_delayed_init(), prior to this point. If not,
|
|
|
|
* then we are done.
|
|
|
|
*/
|
|
|
|
if (!mtrr_aps_delayed_init)
|
|
|
|
return;
|
|
|
|
|
2009-08-20 09:05:36 +08:00
|
|
|
set_mtrr(~0U, 0, 0, 0);
|
2009-08-22 08:00:02 +08:00
|
|
|
mtrr_aps_delayed_init = false;
|
2009-08-20 09:05:36 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
void mtrr_bp_restore(void)
|
|
|
|
{
|
|
|
|
if (!use_intel())
|
|
|
|
return;
|
|
|
|
|
|
|
|
mtrr_if->set_all();
|
|
|
|
}
|
|
|
|
|
2005-07-08 08:56:38 +08:00
|
|
|
static int __init mtrr_init_finialize(void)
|
|
|
|
{
|
|
|
|
if (!mtrr_if)
|
|
|
|
return 0;
|
2009-07-04 10:26:28 +08:00
|
|
|
|
2008-04-29 18:52:33 +08:00
|
|
|
if (use_intel()) {
|
2008-05-02 17:40:22 +08:00
|
|
|
if (!changed_by_mtrr_cleanup)
|
2008-04-29 18:52:33 +08:00
|
|
|
mtrr_state_warn();
|
2009-07-04 10:26:28 +08:00
|
|
|
return 0;
|
2005-07-08 08:56:38 +08:00
|
|
|
}
|
2009-07-04 10:26:28 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* The CPU has no MTRR and seems to not support SMP. They have
|
|
|
|
* specific drivers, we use a tricky method to support
|
|
|
|
* suspend/resume for them.
|
|
|
|
*
|
|
|
|
* TBD: is there any system with such CPU which supports
|
|
|
|
* suspend/resume? If no, we should remove the code.
|
|
|
|
*/
|
2011-03-24 05:15:54 +08:00
|
|
|
register_syscore_ops(&mtrr_syscore_ops);
|
2009-07-04 10:26:28 +08:00
|
|
|
|
2005-07-08 08:56:38 +08:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
subsys_initcall(mtrr_init_finialize);
|