Merge branches 'x86/urgent', 'x86/amd-iommu', 'x86/apic', 'x86/cleanups', 'x86/core', 'x86/cpu', 'x86/fixmap', 'x86/gart', 'x86/kprobes', 'x86/memtest', 'x86/modules', 'x86/nmi', 'x86/pat', 'x86/reboot', 'x86/setup', 'x86/step', 'x86/unify-pci', 'x86/uv', 'x86/xen' and 'xen-64bit' into x86/for-linus
This commit is contained in:
|
@ -1206,7 +1206,7 @@ and is between 256 and 4096 characters. It is defined in the file
|
|||
or
|
||||
memmap=0x10000$0x18690000
|
||||
|
||||
memtest= [KNL,X86_64] Enable memtest
|
||||
memtest= [KNL,X86] Enable memtest
|
||||
Format: <integer>
|
||||
range: 0,4 : pattern number
|
||||
default : 0 <disable>
|
||||
|
@ -2158,6 +2158,10 @@ and is between 256 and 4096 characters. It is defined in the file
|
|||
Note that genuine overcurrent events won't be
|
||||
reported either.
|
||||
|
||||
unknown_nmi_panic
|
||||
[X86-32,X86-64]
|
||||
Set unknown_nmi_panic=1 early on boot.
|
||||
|
||||
usbcore.autosuspend=
|
||||
[USB] The autosuspend time delay (in seconds) used
|
||||
for newly-detected USB devices (default 2). This
|
||||
|
|
|
@ -447,7 +447,6 @@ config PARAVIRT_DEBUG
|
|||
|
||||
config MEMTEST
|
||||
bool "Memtest"
|
||||
depends on X86_64
|
||||
help
|
||||
This option adds a kernel parameter 'memtest', which allows memtest
|
||||
to be set.
|
||||
|
|
|
@ -362,10 +362,6 @@ config X86_ALIGNMENT_16
|
|||
def_bool y
|
||||
depends on MWINCHIP3D || MWINCHIP2 || MWINCHIPC6 || MCYRIXIII || X86_ELAN || MK6 || M586MMX || M586TSC || M586 || M486 || MVIAC3_2 || MGEODEGX1
|
||||
|
||||
config X86_GOOD_APIC
|
||||
def_bool y
|
||||
depends on MK7 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || MK8 || MEFFICEON || MCORE2 || MVIAC7 || X86_64
|
||||
|
||||
config X86_INTEL_USERCOPY
|
||||
def_bool y
|
||||
depends on MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M586MMX || X86_GENERIC || MK8 || MK7 || MEFFICEON || MCORE2
|
||||
|
|
|
@ -289,7 +289,6 @@ config CPA_DEBUG
|
|||
|
||||
config OPTIMIZE_INLINING
|
||||
bool "Allow gcc to uninline functions marked 'inline'"
|
||||
depends on BROKEN
|
||||
help
|
||||
This option determines if the kernel forces gcc to inline the functions
|
||||
developers have marked 'inline'. Doing so takes away freedom from gcc to
|
||||
|
@ -300,5 +299,7 @@ config OPTIMIZE_INLINING
|
|||
become the default in the future, until then this option is there to
|
||||
test gcc for this.
|
||||
|
||||
If unsure, say N.
|
||||
|
||||
endmenu
|
||||
|
||||
|
|
|
@ -167,9 +167,8 @@ void query_edd(void)
|
|||
* Scan the BIOS-supported hard disks and query EDD
|
||||
* information...
|
||||
*/
|
||||
get_edd_info(devno, &ei);
|
||||
|
||||
if (boot_params.eddbuf_entries < EDDMAXNR) {
|
||||
if (!get_edd_info(devno, &ei)
|
||||
&& boot_params.eddbuf_entries < EDDMAXNR) {
|
||||
memcpy(edp, &ei, sizeof ei);
|
||||
edp++;
|
||||
boot_params.eddbuf_entries++;
|
||||
|
|
|
@ -98,12 +98,6 @@ static void reset_coprocessor(void)
|
|||
/*
|
||||
* Set up the GDT
|
||||
*/
|
||||
#define GDT_ENTRY(flags, base, limit) \
|
||||
(((u64)(base & 0xff000000) << 32) | \
|
||||
((u64)flags << 40) | \
|
||||
((u64)(limit & 0x00ff0000) << 32) | \
|
||||
((u64)(base & 0x00ffffff) << 16) | \
|
||||
((u64)(limit & 0x0000ffff)))
|
||||
|
||||
struct gdt_ptr {
|
||||
u16 len;
|
||||
|
|
|
@ -36,6 +36,11 @@
|
|||
|
||||
#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
|
||||
|
||||
#define FIX_EFLAGS (X86_EFLAGS_AC | X86_EFLAGS_OF | \
|
||||
X86_EFLAGS_DF | X86_EFLAGS_TF | X86_EFLAGS_SF | \
|
||||
X86_EFLAGS_ZF | X86_EFLAGS_AF | X86_EFLAGS_PF | \
|
||||
X86_EFLAGS_CF)
|
||||
|
||||
asmlinkage int do_signal(struct pt_regs *regs, sigset_t *oldset);
|
||||
void signal_fault(struct pt_regs *regs, void __user *frame, char *where);
|
||||
|
||||
|
@ -248,7 +253,7 @@ static int ia32_restore_sigcontext(struct pt_regs *regs,
|
|||
regs->ss |= 3;
|
||||
|
||||
err |= __get_user(tmpflags, &sc->flags);
|
||||
regs->flags = (regs->flags & ~0x40DD5) | (tmpflags & 0x40DD5);
|
||||
regs->flags = (regs->flags & ~FIX_EFLAGS) | (tmpflags & FIX_EFLAGS);
|
||||
/* disable syscall checks */
|
||||
regs->orig_ax = -1;
|
||||
|
||||
|
@ -515,7 +520,6 @@ int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
|
|||
compat_sigset_t *set, struct pt_regs *regs)
|
||||
{
|
||||
struct rt_sigframe __user *frame;
|
||||
struct exec_domain *ed = current_thread_info()->exec_domain;
|
||||
void __user *restorer;
|
||||
int err = 0;
|
||||
|
||||
|
@ -538,8 +542,7 @@ int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
|
|||
if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
|
||||
goto give_sigsegv;
|
||||
|
||||
err |= __put_user((ed && ed->signal_invmap && sig < 32
|
||||
? ed->signal_invmap[sig] : sig), &frame->sig);
|
||||
err |= __put_user(sig, &frame->sig);
|
||||
err |= __put_user(ptr_to_compat(&frame->info), &frame->pinfo);
|
||||
err |= __put_user(ptr_to_compat(&frame->uc), &frame->puc);
|
||||
err |= copy_siginfo_to_user32(&frame->info, info);
|
||||
|
|
|
@ -37,6 +37,11 @@
|
|||
movq %rax,R8(%rsp)
|
||||
.endm
|
||||
|
||||
/*
|
||||
* Reload arg registers from stack in case ptrace changed them.
|
||||
* We don't reload %eax because syscall_trace_enter() returned
|
||||
* the value it wants us to use in the table lookup.
|
||||
*/
|
||||
.macro LOAD_ARGS32 offset
|
||||
movl \offset(%rsp),%r11d
|
||||
movl \offset+8(%rsp),%r10d
|
||||
|
@ -46,7 +51,6 @@
|
|||
movl \offset+48(%rsp),%edx
|
||||
movl \offset+56(%rsp),%esi
|
||||
movl \offset+64(%rsp),%edi
|
||||
movl \offset+72(%rsp),%eax
|
||||
.endm
|
||||
|
||||
.macro CFI_STARTPROC32 simple
|
||||
|
@ -137,13 +141,12 @@ ENTRY(ia32_sysenter_target)
|
|||
.previous
|
||||
GET_THREAD_INFO(%r10)
|
||||
orl $TS_COMPAT,TI_status(%r10)
|
||||
testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP), \
|
||||
TI_flags(%r10)
|
||||
testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%r10)
|
||||
CFI_REMEMBER_STATE
|
||||
jnz sysenter_tracesys
|
||||
sysenter_do_call:
|
||||
cmpl $(IA32_NR_syscalls-1),%eax
|
||||
ja ia32_badsys
|
||||
sysenter_do_call:
|
||||
IA32_ARG_FIXUP 1
|
||||
call *ia32_sys_call_table(,%rax,8)
|
||||
movq %rax,RAX-ARGOFFSET(%rsp)
|
||||
|
@ -242,8 +245,7 @@ ENTRY(ia32_cstar_target)
|
|||
.previous
|
||||
GET_THREAD_INFO(%r10)
|
||||
orl $TS_COMPAT,TI_status(%r10)
|
||||
testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP), \
|
||||
TI_flags(%r10)
|
||||
testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%r10)
|
||||
CFI_REMEMBER_STATE
|
||||
jnz cstar_tracesys
|
||||
cstar_do_call:
|
||||
|
@ -321,6 +323,7 @@ ENTRY(ia32_syscall)
|
|||
/*CFI_REL_OFFSET rflags,EFLAGS-RIP*/
|
||||
/*CFI_REL_OFFSET cs,CS-RIP*/
|
||||
CFI_REL_OFFSET rip,RIP-RIP
|
||||
PARAVIRT_ADJUST_EXCEPTION_FRAME
|
||||
SWAPGS
|
||||
/*
|
||||
* No need to follow this irqs on/off section: the syscall
|
||||
|
@ -336,8 +339,7 @@ ENTRY(ia32_syscall)
|
|||
SAVE_ARGS 0,0,1
|
||||
GET_THREAD_INFO(%r10)
|
||||
orl $TS_COMPAT,TI_status(%r10)
|
||||
testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP), \
|
||||
TI_flags(%r10)
|
||||
testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%r10)
|
||||
jnz ia32_tracesys
|
||||
ia32_do_syscall:
|
||||
cmpl $(IA32_NR_syscalls-1),%eax
|
||||
|
|
|
@ -102,6 +102,7 @@ obj-$(CONFIG_OLPC) += olpc.o
|
|||
# 64 bit specific files
|
||||
ifeq ($(CONFIG_X86_64),y)
|
||||
obj-y += genapic_64.o genapic_flat_64.o genx2apic_uv_x.o tlb_uv.o
|
||||
obj-y += bios_uv.o
|
||||
obj-$(CONFIG_X86_PM_TIMER) += pmtimer_64.o
|
||||
obj-$(CONFIG_AUDIT) += audit_64.o
|
||||
|
||||
|
|
|
@ -9,6 +9,7 @@
|
|||
#include <linux/bootmem.h>
|
||||
#include <linux/dmi.h>
|
||||
#include <linux/cpumask.h>
|
||||
#include <asm/segment.h>
|
||||
|
||||
#include "realmode/wakeup.h"
|
||||
#include "sleep.h"
|
||||
|
@ -23,15 +24,6 @@ static unsigned long acpi_realmode;
|
|||
static char temp_stack[10240];
|
||||
#endif
|
||||
|
||||
/* XXX: this macro should move to asm-x86/segment.h and be shared with the
|
||||
boot code... */
|
||||
#define GDT_ENTRY(flags, base, limit) \
|
||||
(((u64)(base & 0xff000000) << 32) | \
|
||||
((u64)flags << 40) | \
|
||||
((u64)(limit & 0x00ff0000) << 32) | \
|
||||
((u64)(base & 0x00ffffff) << 16) | \
|
||||
((u64)(limit & 0x0000ffff)))
|
||||
|
||||
/**
|
||||
* acpi_save_state_mem - save kernel state
|
||||
*
|
||||
|
|
|
@ -23,7 +23,7 @@
|
|||
#include <linux/scatterlist.h>
|
||||
#include <linux/iommu-helper.h>
|
||||
#include <asm/proto.h>
|
||||
#include <asm/gart.h>
|
||||
#include <asm/iommu.h>
|
||||
#include <asm/amd_iommu_types.h>
|
||||
#include <asm/amd_iommu.h>
|
||||
|
||||
|
@ -32,21 +32,37 @@
|
|||
#define to_pages(addr, size) \
|
||||
(round_up(((addr) & ~PAGE_MASK) + (size), PAGE_SIZE) >> PAGE_SHIFT)
|
||||
|
||||
#define EXIT_LOOP_COUNT 10000000
|
||||
|
||||
static DEFINE_RWLOCK(amd_iommu_devtable_lock);
|
||||
|
||||
struct command {
|
||||
/*
|
||||
* general struct to manage commands send to an IOMMU
|
||||
*/
|
||||
struct iommu_cmd {
|
||||
u32 data[4];
|
||||
};
|
||||
|
||||
static int dma_ops_unity_map(struct dma_ops_domain *dma_dom,
|
||||
struct unity_map_entry *e);
|
||||
|
||||
/* returns !0 if the IOMMU is caching non-present entries in its TLB */
|
||||
static int iommu_has_npcache(struct amd_iommu *iommu)
|
||||
{
|
||||
return iommu->cap & IOMMU_CAP_NPCACHE;
|
||||
}
|
||||
|
||||
static int __iommu_queue_command(struct amd_iommu *iommu, struct command *cmd)
|
||||
/****************************************************************************
|
||||
*
|
||||
* IOMMU command queuing functions
|
||||
*
|
||||
****************************************************************************/
|
||||
|
||||
/*
|
||||
* Writes the command to the IOMMUs command buffer and informs the
|
||||
* hardware about the new command. Must be called with iommu->lock held.
|
||||
*/
|
||||
static int __iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd)
|
||||
{
|
||||
u32 tail, head;
|
||||
u8 *target;
|
||||
|
@ -63,7 +79,11 @@ static int __iommu_queue_command(struct amd_iommu *iommu, struct command *cmd)
|
|||
return 0;
|
||||
}
|
||||
|
||||
static int iommu_queue_command(struct amd_iommu *iommu, struct command *cmd)
|
||||
/*
|
||||
* General queuing function for commands. Takes iommu->lock and calls
|
||||
* __iommu_queue_command().
|
||||
*/
|
||||
static int iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd)
|
||||
{
|
||||
unsigned long flags;
|
||||
int ret;
|
||||
|
@ -75,16 +95,24 @@ static int iommu_queue_command(struct amd_iommu *iommu, struct command *cmd)
|
|||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* This function is called whenever we need to ensure that the IOMMU has
|
||||
* completed execution of all commands we sent. It sends a
|
||||
* COMPLETION_WAIT command and waits for it to finish. The IOMMU informs
|
||||
* us about that by writing a value to a physical address we pass with
|
||||
* the command.
|
||||
*/
|
||||
static int iommu_completion_wait(struct amd_iommu *iommu)
|
||||
{
|
||||
int ret;
|
||||
struct command cmd;
|
||||
struct iommu_cmd cmd;
|
||||
volatile u64 ready = 0;
|
||||
unsigned long ready_phys = virt_to_phys(&ready);
|
||||
unsigned long i = 0;
|
||||
|
||||
memset(&cmd, 0, sizeof(cmd));
|
||||
cmd.data[0] = LOW_U32(ready_phys) | CMD_COMPL_WAIT_STORE_MASK;
|
||||
cmd.data[1] = HIGH_U32(ready_phys);
|
||||
cmd.data[1] = upper_32_bits(ready_phys);
|
||||
cmd.data[2] = 1; /* value written to 'ready' */
|
||||
CMD_SET_TYPE(&cmd, CMD_COMPL_WAIT);
|
||||
|
||||
|
@ -95,15 +123,23 @@ static int iommu_completion_wait(struct amd_iommu *iommu)
|
|||
if (ret)
|
||||
return ret;
|
||||
|
||||
while (!ready)
|
||||
while (!ready && (i < EXIT_LOOP_COUNT)) {
|
||||
++i;
|
||||
cpu_relax();
|
||||
}
|
||||
|
||||
if (unlikely((i == EXIT_LOOP_COUNT) && printk_ratelimit()))
|
||||
printk(KERN_WARNING "AMD IOMMU: Completion wait loop failed\n");
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Command send function for invalidating a device table entry
|
||||
*/
|
||||
static int iommu_queue_inv_dev_entry(struct amd_iommu *iommu, u16 devid)
|
||||
{
|
||||
struct command cmd;
|
||||
struct iommu_cmd cmd;
|
||||
|
||||
BUG_ON(iommu == NULL);
|
||||
|
||||
|
@ -116,20 +152,23 @@ static int iommu_queue_inv_dev_entry(struct amd_iommu *iommu, u16 devid)
|
|||
return iommu_queue_command(iommu, &cmd);
|
||||
}
|
||||
|
||||
/*
|
||||
* Generic command send function for invalidaing TLB entries
|
||||
*/
|
||||
static int iommu_queue_inv_iommu_pages(struct amd_iommu *iommu,
|
||||
u64 address, u16 domid, int pde, int s)
|
||||
{
|
||||
struct command cmd;
|
||||
struct iommu_cmd cmd;
|
||||
|
||||
memset(&cmd, 0, sizeof(cmd));
|
||||
address &= PAGE_MASK;
|
||||
CMD_SET_TYPE(&cmd, CMD_INV_IOMMU_PAGES);
|
||||
cmd.data[1] |= domid;
|
||||
cmd.data[2] = LOW_U32(address);
|
||||
cmd.data[3] = HIGH_U32(address);
|
||||
if (s)
|
||||
cmd.data[3] = upper_32_bits(address);
|
||||
if (s) /* size bit - we flush more than one 4kb page */
|
||||
cmd.data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK;
|
||||
if (pde)
|
||||
if (pde) /* PDE bit - we wan't flush everything not only the PTEs */
|
||||
cmd.data[2] |= CMD_INV_IOMMU_PAGES_PDE_MASK;
|
||||
|
||||
iommu->need_sync = 1;
|
||||
|
@ -137,6 +176,11 @@ static int iommu_queue_inv_iommu_pages(struct amd_iommu *iommu,
|
|||
return iommu_queue_command(iommu, &cmd);
|
||||
}
|
||||
|
||||
/*
|
||||
* TLB invalidation function which is called from the mapping functions.
|
||||
* It invalidates a single PTE if the range to flush is within a single
|
||||
* page. Otherwise it flushes the whole TLB of the IOMMU.
|
||||
*/
|
||||
static int iommu_flush_pages(struct amd_iommu *iommu, u16 domid,
|
||||
u64 address, size_t size)
|
||||
{
|
||||
|
@ -159,6 +203,20 @@ static int iommu_flush_pages(struct amd_iommu *iommu, u16 domid,
|
|||
return 0;
|
||||
}
|
||||
|
||||
/****************************************************************************
|
||||
*
|
||||
* The functions below are used the create the page table mappings for
|
||||
* unity mapped regions.
|
||||
*
|
||||
****************************************************************************/
|
||||
|
||||
/*
|
||||
* Generic mapping functions. It maps a physical address into a DMA
|
||||
* address space. It allocates the page table pages if necessary.
|
||||
* In the future it can be extended to a generic mapping function
|
||||
* supporting all features of AMD IOMMU page tables like level skipping
|
||||
* and full 64 bit address spaces.
|
||||
*/
|
||||
static int iommu_map(struct protection_domain *dom,
|
||||
unsigned long bus_addr,
|
||||
unsigned long phys_addr,
|
||||
|
@ -209,6 +267,10 @@ static int iommu_map(struct protection_domain *dom,
|
|||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* This function checks if a specific unity mapping entry is needed for
|
||||
* this specific IOMMU.
|
||||
*/
|
||||
static int iommu_for_unity_map(struct amd_iommu *iommu,
|
||||
struct unity_map_entry *entry)
|
||||
{
|
||||
|
@ -223,6 +285,12 @@ static int iommu_for_unity_map(struct amd_iommu *iommu,
|
|||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Init the unity mappings for a specific IOMMU in the system
|
||||
*
|
||||
* Basically iterates over all unity mapping entries and applies them to
|
||||
* the default domain DMA of that IOMMU if necessary.
|
||||
*/
|
||||
static int iommu_init_unity_mappings(struct amd_iommu *iommu)
|
||||
{
|
||||
struct unity_map_entry *entry;
|
||||
|
@ -239,6 +307,10 @@ static int iommu_init_unity_mappings(struct amd_iommu *iommu)
|
|||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* This function actually applies the mapping to the page table of the
|
||||
* dma_ops domain.
|
||||
*/
|
||||
static int dma_ops_unity_map(struct dma_ops_domain *dma_dom,
|
||||
struct unity_map_entry *e)
|
||||
{
|
||||
|
@ -261,6 +333,9 @@ static int dma_ops_unity_map(struct dma_ops_domain *dma_dom,
|
|||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Inits the unity mappings required for a specific device
|
||||
*/
|
||||
static int init_unity_mappings_for_device(struct dma_ops_domain *dma_dom,
|
||||
u16 devid)
|
||||
{
|
||||
|
@ -278,12 +353,26 @@ static int init_unity_mappings_for_device(struct dma_ops_domain *dma_dom,
|
|||
return 0;
|
||||
}
|
||||
|
||||
/****************************************************************************
|
||||
*
|
||||
* The next functions belong to the address allocator for the dma_ops
|
||||
* interface functions. They work like the allocators in the other IOMMU
|
||||
* drivers. Its basically a bitmap which marks the allocated pages in
|
||||
* the aperture. Maybe it could be enhanced in the future to a more
|
||||
* efficient allocator.
|
||||
*
|
||||
****************************************************************************/
|
||||
static unsigned long dma_mask_to_pages(unsigned long mask)
|
||||
{
|
||||
return (mask >> PAGE_SHIFT) +
|
||||
(PAGE_ALIGN(mask & ~PAGE_MASK) >> PAGE_SHIFT);
|
||||
}
|
||||
|
||||
/*
|
||||
* The address allocator core function.
|
||||
*
|
||||
* called with domain->lock held
|
||||
*/
|
||||
static unsigned long dma_ops_alloc_addresses(struct device *dev,
|
||||
struct dma_ops_domain *dom,
|
||||
unsigned int pages)
|
||||
|
@ -317,6 +406,11 @@ static unsigned long dma_ops_alloc_addresses(struct device *dev,
|
|||
return address;
|
||||
}
|
||||
|
||||
/*
|
||||
* The address free function.
|
||||
*
|
||||
* called with domain->lock held
|
||||
*/
|
||||
static void dma_ops_free_addresses(struct dma_ops_domain *dom,
|
||||
unsigned long address,
|
||||
unsigned int pages)
|
||||
|
@ -325,6 +419,16 @@ static void dma_ops_free_addresses(struct dma_ops_domain *dom,
|
|||
iommu_area_free(dom->bitmap, address, pages);
|
||||
}
|
||||
|
||||
/****************************************************************************
|
||||
*
|
||||
* The next functions belong to the domain allocation. A domain is
|
||||
* allocated for every IOMMU as the default domain. If device isolation
|
||||
* is enabled, every device get its own domain. The most important thing
|
||||
* about domains is the page table mapping the DMA address space they
|
||||
* contain.
|
||||
*
|
||||
****************************************************************************/
|
||||
|
||||
static u16 domain_id_alloc(void)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
@ -342,6 +446,10 @@ static u16 domain_id_alloc(void)
|
|||
return id;
|
||||
}
|
||||
|
||||
/*
|
||||
* Used to reserve address ranges in the aperture (e.g. for exclusion
|
||||
* ranges.
|
||||
*/
|
||||
static void dma_ops_reserve_addresses(struct dma_ops_domain *dom,
|
||||
unsigned long start_page,
|
||||
unsigned int pages)
|
||||
|
@ -382,6 +490,10 @@ static void dma_ops_free_pagetable(struct dma_ops_domain *dma_dom)
|
|||
free_page((unsigned long)p1);
|
||||
}
|
||||
|
||||
/*
|
||||
* Free a domain, only used if something went wrong in the
|
||||
* allocation path and we need to free an already allocated page table
|
||||
*/
|
||||
static void dma_ops_domain_free(struct dma_ops_domain *dom)
|
||||
{
|
||||
if (!dom)
|
||||
|
@ -396,6 +508,11 @@ static void dma_ops_domain_free(struct dma_ops_domain *dom)
|
|||
kfree(dom);
|
||||
}
|
||||
|
||||
/*
|
||||
* Allocates a new protection domain usable for the dma_ops functions.
|
||||
* It also intializes the page table and the address allocator data
|
||||
* structures required for the dma_ops interface
|
||||
*/
|
||||
static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu,
|
||||
unsigned order)
|
||||
{
|
||||
|
@ -436,6 +553,7 @@ static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu,
|
|||
dma_dom->bitmap[0] = 1;
|
||||
dma_dom->next_bit = 0;
|
||||
|
||||
/* Intialize the exclusion range if necessary */
|
||||
if (iommu->exclusion_start &&
|
||||
iommu->exclusion_start < dma_dom->aperture_size) {
|
||||
unsigned long startpage = iommu->exclusion_start >> PAGE_SHIFT;
|
||||
|
@ -444,6 +562,11 @@ static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu,
|
|||
dma_ops_reserve_addresses(dma_dom, startpage, pages);
|
||||
}
|
||||
|
||||
/*
|
||||
* At the last step, build the page tables so we don't need to
|
||||
* allocate page table pages in the dma_ops mapping/unmapping
|
||||
* path.
|
||||
*/
|
||||
num_pte_pages = dma_dom->aperture_size / (PAGE_SIZE * 512);
|
||||
dma_dom->pte_pages = kzalloc(num_pte_pages * sizeof(void *),
|
||||
GFP_KERNEL);
|
||||
|
@ -472,6 +595,10 @@ free_dma_dom:
|
|||
return NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* Find out the protection domain structure for a given PCI device. This
|
||||
* will give us the pointer to the page table root for example.
|
||||
*/
|
||||
static struct protection_domain *domain_for_device(u16 devid)
|
||||
{
|
||||
struct protection_domain *dom;
|
||||
|
@ -484,6 +611,10 @@ static struct protection_domain *domain_for_device(u16 devid)
|
|||
return dom;
|
||||
}
|
||||
|
||||
/*
|
||||
* If a device is not yet associated with a domain, this function does
|
||||
* assigns it visible for the hardware
|
||||
*/
|
||||
static void set_device_domain(struct amd_iommu *iommu,
|
||||
struct protection_domain *domain,
|
||||
u16 devid)
|
||||
|
@ -508,6 +639,19 @@ static void set_device_domain(struct amd_iommu *iommu,
|
|||
iommu->need_sync = 1;
|
||||
}
|
||||
|
||||
/*****************************************************************************
|
||||
*
|
||||
* The next functions belong to the dma_ops mapping/unmapping code.
|
||||
*
|
||||
*****************************************************************************/
|
||||
|
||||
/*
|
||||
* In the dma_ops path we only have the struct device. This function
|
||||
* finds the corresponding IOMMU, the protection domain and the
|
||||
* requestor id for a given device.
|
||||
* If the device is not yet associated with a domain this is also done
|
||||
* in this function.
|
||||
*/
|
||||
static int get_device_resources(struct device *dev,
|
||||
struct amd_iommu **iommu,
|
||||
struct protection_domain **domain,
|
||||
|
@ -520,8 +664,9 @@ static int get_device_resources(struct device *dev,
|
|||
BUG_ON(!dev || dev->bus != &pci_bus_type || !dev->dma_mask);
|
||||
|
||||
pcidev = to_pci_dev(dev);
|
||||
_bdf = (pcidev->bus->number << 8) | pcidev->devfn;
|
||||
_bdf = calc_devid(pcidev->bus->number, pcidev->devfn);
|
||||
|
||||
/* device not translated by any IOMMU in the system? */
|
||||
if (_bdf >= amd_iommu_last_bdf) {
|
||||
*iommu = NULL;
|
||||
*domain = NULL;
|
||||
|
@ -547,6 +692,10 @@ static int get_device_resources(struct device *dev,
|
|||
return 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* This is the generic map function. It maps one 4kb page at paddr to
|
||||
* the given address in the DMA address space for the domain.
|
||||
*/
|
||||
static dma_addr_t dma_ops_domain_map(struct amd_iommu *iommu,
|
||||
struct dma_ops_domain *dom,
|
||||
unsigned long address,
|
||||
|
@ -578,6 +727,9 @@ static dma_addr_t dma_ops_domain_map(struct amd_iommu *iommu,
|
|||
return (dma_addr_t)address;
|
||||
}
|
||||
|
||||
/*
|
||||
* The generic unmapping function for on page in the DMA address space.
|
||||
*/
|
||||
static void dma_ops_domain_unmap(struct amd_iommu *iommu,
|
||||
struct dma_ops_domain *dom,
|
||||
unsigned long address)
|
||||
|
@ -597,6 +749,12 @@ static void dma_ops_domain_unmap(struct amd_iommu *iommu,
|
|||
*pte = 0ULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* This function contains common code for mapping of a physically
|
||||
* contiguous memory region into DMA address space. It is uses by all
|
||||
* mapping functions provided by this IOMMU driver.
|
||||
* Must be called with the domain lock held.
|
||||
*/
|
||||
static dma_addr_t __map_single(struct device *dev,
|
||||
struct amd_iommu *iommu,
|
||||
struct dma_ops_domain *dma_dom,
|
||||
|
@ -628,6 +786,10 @@ out:
|
|||
return address;
|
||||
}
|
||||
|
||||
/*
|
||||
* Does the reverse of the __map_single function. Must be called with
|
||||
* the domain lock held too
|
||||
*/
|
||||
static void __unmap_single(struct amd_iommu *iommu,
|
||||
struct dma_ops_domain *dma_dom,
|
||||
dma_addr_t dma_addr,
|
||||
|
@ -652,6 +814,9 @@ static void __unmap_single(struct amd_iommu *iommu,
|
|||
dma_ops_free_addresses(dma_dom, dma_addr, pages);
|
||||
}
|
||||
|
||||
/*
|
||||
* The exported map_single function for dma_ops.
|
||||
*/
|
||||
static dma_addr_t map_single(struct device *dev, phys_addr_t paddr,
|
||||
size_t size, int dir)
|
||||
{
|
||||
|
@ -664,6 +829,7 @@ static dma_addr_t map_single(struct device *dev, phys_addr_t paddr,
|
|||
get_device_resources(dev, &iommu, &domain, &devid);
|
||||
|
||||
if (iommu == NULL || domain == NULL)
|
||||
/* device not handled by any AMD IOMMU */
|
||||
return (dma_addr_t)paddr;
|
||||
|
||||
spin_lock_irqsave(&domain->lock, flags);
|
||||
|
@ -683,6 +849,9 @@ out:
|
|||
return addr;
|
||||
}
|
||||
|
||||
/*
|
||||
* The exported unmap_single function for dma_ops.
|
||||
*/
|
||||
static void unmap_single(struct device *dev, dma_addr_t dma_addr,
|
||||
size_t size, int dir)
|
||||
{
|
||||
|
@ -692,6 +861,7 @@ static void unmap_single(struct device *dev, dma_addr_t dma_addr,
|
|||
u16 devid;
|
||||
|
||||
if (!get_device_resources(dev, &iommu, &domain, &devid))
|
||||
/* device not handled by any AMD IOMMU */
|
||||
return;
|
||||
|
||||
spin_lock_irqsave(&domain->lock, flags);
|
||||
|
@ -706,6 +876,10 @@ static void unmap_single(struct device *dev, dma_addr_t dma_addr,
|
|||
spin_unlock_irqrestore(&domain->lock, flags);
|
||||
}
|
||||
|
||||
/*
|
||||
* This is a special map_sg function which is used if we should map a
|
||||
* device which is not handled by an AMD IOMMU in the system.
|
||||
*/
|
||||
static int map_sg_no_iommu(struct device *dev, struct scatterlist *sglist,
|
||||
int nelems, int dir)
|
||||
{
|
||||
|
@ -720,6 +894,10 @@ static int map_sg_no_iommu(struct device *dev, struct scatterlist *sglist,
|
|||
return nelems;
|
||||
}
|
||||
|
||||
/*
|
||||
* The exported map_sg function for dma_ops (handles scatter-gather
|
||||
* lists).
|
||||
*/
|
||||
static int map_sg(struct device *dev, struct scatterlist *sglist,
|
||||
int nelems, int dir)
|
||||
{
|
||||
|
@ -775,6 +953,10 @@ unmap:
|
|||
goto out;
|
||||
}
|
||||
|
||||
/*
|
||||
* The exported map_sg function for dma_ops (handles scatter-gather
|
||||
* lists).
|
||||
*/
|
||||
static void unmap_sg(struct device *dev, struct scatterlist *sglist,
|
||||
int nelems, int dir)
|
||||
{
|
||||
|
@ -804,6 +986,9 @@ static void unmap_sg(struct device *dev, struct scatterlist *sglist,
|
|||
spin_unlock_irqrestore(&domain->lock, flags);
|
||||
}
|
||||
|
||||
/*
|
||||
* The exported alloc_coherent function for dma_ops.
|
||||
*/
|
||||
static void *alloc_coherent(struct device *dev, size_t size,
|
||||
dma_addr_t *dma_addr, gfp_t flag)
|
||||
{
|
||||
|
@ -851,6 +1036,11 @@ out:
|
|||
return virt_addr;
|
||||
}
|
||||
|
||||
/*
|
||||
* The exported free_coherent function for dma_ops.
|
||||
* FIXME: fix the generic x86 DMA layer so that it actually calls that
|
||||
* function.
|
||||
*/
|
||||
static void free_coherent(struct device *dev, size_t size,
|
||||
void *virt_addr, dma_addr_t dma_addr)
|
||||
{
|
||||
|
@ -879,6 +1069,8 @@ free_mem:
|
|||
}
|
||||
|
||||
/*
|
||||
* The function for pre-allocating protection domains.
|
||||
*
|
||||
* If the driver core informs the DMA layer if a driver grabs a device
|
||||
* we don't need to preallocate the protection domains anymore.
|
||||
* For now we have to.
|
||||
|
@ -921,12 +1113,20 @@ static struct dma_mapping_ops amd_iommu_dma_ops = {
|
|||
.unmap_sg = unmap_sg,
|
||||
};
|
||||
|
||||
/*
|
||||
* The function which clues the AMD IOMMU driver into dma_ops.
|
||||
*/
|
||||
int __init amd_iommu_init_dma_ops(void)
|
||||
{
|
||||
struct amd_iommu *iommu;
|
||||
int order = amd_iommu_aperture_order;
|
||||
int ret;
|
||||
|
||||
/*
|
||||
* first allocate a default protection domain for every IOMMU we
|
||||
* found in the system. Devices not assigned to any other
|
||||
* protection domain will be assigned to the default one.
|
||||
*/
|
||||
list_for_each_entry(iommu, &amd_iommu_list, list) {
|
||||
iommu->default_dom = dma_ops_domain_alloc(iommu, order);
|
||||
if (iommu->default_dom == NULL)
|
||||
|
@ -936,6 +1136,10 @@ int __init amd_iommu_init_dma_ops(void)
|
|||
goto free_domains;
|
||||
}
|
||||
|
||||
/*
|
||||
* If device isolation is enabled, pre-allocate the protection
|
||||
* domains for each device.
|
||||
*/
|
||||
if (amd_iommu_isolate)
|
||||
prealloc_protection_domains();
|
||||
|
||||
|
@ -947,6 +1151,7 @@ int __init amd_iommu_init_dma_ops(void)
|
|||
gart_iommu_aperture = 0;
|
||||
#endif
|
||||
|
||||
/* Make the driver finally visible to the drivers */
|
||||
dma_ops = &amd_iommu_dma_ops;
|
||||
|
||||
return 0;
|
||||
|
|
|
@ -25,20 +25,13 @@
|
|||
#include <asm/pci-direct.h>
|
||||
#include <asm/amd_iommu_types.h>
|
||||
#include <asm/amd_iommu.h>
|
||||
#include <asm/gart.h>
|
||||
#include <asm/iommu.h>
|
||||
|
||||
/*
|
||||
* definitions for the ACPI scanning code
|
||||
*/
|
||||
#define UPDATE_LAST_BDF(x) do {\
|
||||
if ((x) > amd_iommu_last_bdf) \
|
||||
amd_iommu_last_bdf = (x); \
|
||||
} while (0);
|
||||
|
||||
#define DEVID(bus, devfn) (((bus) << 8) | (devfn))
|
||||
#define PCI_BUS(x) (((x) >> 8) & 0xff)
|
||||
#define IVRS_HEADER_LENGTH 48
|
||||
#define TBL_SIZE(x) (1 << (PAGE_SHIFT + get_order(amd_iommu_last_bdf * (x))))
|
||||
|
||||
#define ACPI_IVHD_TYPE 0x10
|
||||
#define ACPI_IVMD_TYPE_ALL 0x20
|
||||
|
@ -71,6 +64,17 @@
|
|||
#define ACPI_DEVFLAG_LINT1 0x80
|
||||
#define ACPI_DEVFLAG_ATSDIS 0x10000000
|
||||
|
||||
/*
|
||||
* ACPI table definitions
|
||||
*
|
||||
* These data structures are laid over the table to parse the important values
|
||||
* out of it.
|
||||
*/
|
||||
|
||||
/*
|
||||
* structure describing one IOMMU in the ACPI table. Typically followed by one
|
||||
* or more ivhd_entrys.
|
||||
*/
|
||||
struct ivhd_header {
|
||||
u8 type;
|
||||
u8 flags;
|
||||
|
@ -83,6 +87,10 @@ struct ivhd_header {
|
|||
u32 reserved;
|
||||
} __attribute__((packed));
|
||||
|
||||
/*
|
||||
* A device entry describing which devices a specific IOMMU translates and
|
||||
* which requestor ids they use.
|
||||
*/
|
||||
struct ivhd_entry {
|
||||
u8 type;
|
||||
u16 devid;
|
||||
|
@ -90,6 +98,10 @@ struct ivhd_entry {
|
|||
u32 ext;
|
||||
} __attribute__((packed));
|
||||
|
||||
/*
|
||||
* An AMD IOMMU memory definition structure. It defines things like exclusion
|
||||
* ranges for devices and regions that should be unity mapped.
|
||||
*/
|
||||
struct ivmd_header {
|
||||
u8 type;
|
||||
u8 flags;
|
||||
|
@ -103,22 +115,80 @@ struct ivmd_header {
|
|||
|
||||
static int __initdata amd_iommu_detected;
|
||||
|
||||
u16 amd_iommu_last_bdf;
|
||||
struct list_head amd_iommu_unity_map;
|
||||
unsigned amd_iommu_aperture_order = 26;
|
||||
int amd_iommu_isolate;
|
||||
u16 amd_iommu_last_bdf; /* largest PCI device id we have
|
||||
to handle */
|
||||
LIST_HEAD(amd_iommu_unity_map); /* a list of required unity mappings
|
||||
we find in ACPI */
|
||||
unsigned amd_iommu_aperture_order = 26; /* size of aperture in power of 2 */
|
||||
int amd_iommu_isolate; /* if 1, device isolation is enabled */
|
||||
|
||||
struct list_head amd_iommu_list;
|
||||
LIST_HEAD(amd_iommu_list); /* list of all AMD IOMMUs in the
|
||||
system */
|
||||
|
||||
/*
|
||||
* Pointer to the device table which is shared by all AMD IOMMUs
|
||||
* it is indexed by the PCI device id or the HT unit id and contains
|
||||
* information about the domain the device belongs to as well as the
|
||||
* page table root pointer.
|
||||
*/
|
||||
struct dev_table_entry *amd_iommu_dev_table;
|
||||
|
||||
/*
|
||||
* The alias table is a driver specific data structure which contains the
|
||||
* mappings of the PCI device ids to the actual requestor ids on the IOMMU.
|
||||
* More than one device can share the same requestor id.
|
||||
*/
|
||||
u16 *amd_iommu_alias_table;
|
||||
|
||||
/*
|
||||
* The rlookup table is used to find the IOMMU which is responsible
|
||||
* for a specific device. It is also indexed by the PCI device id.
|
||||
*/
|
||||
struct amd_iommu **amd_iommu_rlookup_table;
|
||||
|
||||
/*
|
||||
* The pd table (protection domain table) is used to find the protection domain
|
||||
* data structure a device belongs to. Indexed with the PCI device id too.
|
||||
*/
|
||||
struct protection_domain **amd_iommu_pd_table;
|
||||
|
||||
/*
|
||||
* AMD IOMMU allows up to 2^16 differend protection domains. This is a bitmap
|
||||
* to know which ones are already in use.
|
||||
*/
|
||||
unsigned long *amd_iommu_pd_alloc_bitmap;
|
||||
|
||||
static u32 dev_table_size;
|
||||
static u32 alias_table_size;
|
||||
static u32 rlookup_table_size;
|
||||
static u32 dev_table_size; /* size of the device table */
|
||||
static u32 alias_table_size; /* size of the alias table */
|
||||
static u32 rlookup_table_size; /* size if the rlookup table */
|
||||
|
||||
static inline void update_last_devid(u16 devid)
|
||||
{
|
||||
if (devid > amd_iommu_last_bdf)
|
||||
amd_iommu_last_bdf = devid;
|
||||
}
|
||||
|
||||
static inline unsigned long tbl_size(int entry_size)
|
||||
{
|
||||
unsigned shift = PAGE_SHIFT +
|
||||
get_order(amd_iommu_last_bdf * entry_size);
|
||||
|
||||
return 1UL << shift;
|
||||
}
|
||||
|
||||
/****************************************************************************
|
||||
*
|
||||
* AMD IOMMU MMIO register space handling functions
|
||||
*
|
||||
* These functions are used to program the IOMMU device registers in
|
||||
* MMIO space required for that driver.
|
||||
*
|
||||
****************************************************************************/
|
||||
|
||||
/*
|
||||
* This function set the exclusion range in the IOMMU. DMA accesses to the
|
||||
* exclusion range are passed through untranslated
|
||||
*/
|
||||
static void __init iommu_set_exclusion_range(struct amd_iommu *iommu)
|
||||
{
|
||||
u64 start = iommu->exclusion_start & PAGE_MASK;
|
||||
|
@ -137,6 +207,7 @@ static void __init iommu_set_exclusion_range(struct amd_iommu *iommu)
|
|||
&entry, sizeof(entry));
|
||||
}
|
||||
|
||||
/* Programs the physical address of the device table into the IOMMU hardware */
|
||||
static void __init iommu_set_device_table(struct amd_iommu *iommu)
|
||||
{
|
||||
u32 entry;
|
||||
|
@ -149,6 +220,7 @@ static void __init iommu_set_device_table(struct amd_iommu *iommu)
|
|||
&entry, sizeof(entry));
|
||||
}
|
||||
|
||||
/* Generic functions to enable/disable certain features of the IOMMU. */
|
||||
static void __init iommu_feature_enable(struct amd_iommu *iommu, u8 bit)
|
||||
{
|
||||
u32 ctrl;
|
||||
|
@ -167,6 +239,7 @@ static void __init iommu_feature_disable(struct amd_iommu *iommu, u8 bit)
|
|||
writel(ctrl, iommu->mmio_base + MMIO_CONTROL_OFFSET);
|
||||
}
|
||||
|
||||
/* Function to enable the hardware */
|
||||
void __init iommu_enable(struct amd_iommu *iommu)
|
||||
{
|
||||
printk(KERN_INFO "AMD IOMMU: Enabling IOMMU at ");
|
||||
|
@ -176,6 +249,10 @@ void __init iommu_enable(struct amd_iommu *iommu)
|
|||
iommu_feature_enable(iommu, CONTROL_IOMMU_EN);
|
||||
}
|
||||
|
||||
/*
|
||||
* mapping and unmapping functions for the IOMMU MMIO space. Each AMD IOMMU in
|
||||
* the system has one.
|
||||
*/
|
||||
static u8 * __init iommu_map_mmio_space(u64 address)
|
||||
{
|
||||
u8 *ret;
|
||||
|
@ -199,16 +276,33 @@ static void __init iommu_unmap_mmio_space(struct amd_iommu *iommu)
|
|||
release_mem_region(iommu->mmio_phys, MMIO_REGION_LENGTH);
|
||||
}
|
||||
|
||||
/****************************************************************************
|
||||
*
|
||||
* The functions below belong to the first pass of AMD IOMMU ACPI table
|
||||
* parsing. In this pass we try to find out the highest device id this
|
||||
* code has to handle. Upon this information the size of the shared data
|
||||
* structures is determined later.
|
||||
*
|
||||
****************************************************************************/
|
||||
|
||||
/*
|
||||
* This function reads the last device id the IOMMU has to handle from the PCI
|
||||
* capability header for this IOMMU
|
||||
*/
|
||||
static int __init find_last_devid_on_pci(int bus, int dev, int fn, int cap_ptr)
|
||||
{
|
||||
u32 cap;
|
||||
|
||||
cap = read_pci_config(bus, dev, fn, cap_ptr+MMIO_RANGE_OFFSET);
|
||||
UPDATE_LAST_BDF(DEVID(MMIO_GET_BUS(cap), MMIO_GET_LD(cap)));
|
||||
update_last_devid(calc_devid(MMIO_GET_BUS(cap), MMIO_GET_LD(cap)));
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* After reading the highest device id from the IOMMU PCI capability header
|
||||
* this function looks if there is a higher device id defined in the ACPI table
|
||||
*/
|
||||
static int __init find_last_devid_from_ivhd(struct ivhd_header *h)
|
||||
{
|
||||
u8 *p = (void *)h, *end = (void *)h;
|
||||
|
@ -229,7 +323,8 @@ static int __init find_last_devid_from_ivhd(struct ivhd_header *h)
|
|||
case IVHD_DEV_RANGE_END:
|
||||
case IVHD_DEV_ALIAS:
|
||||
case IVHD_DEV_EXT_SELECT:
|
||||
UPDATE_LAST_BDF(dev->devid);
|
||||
/* all the above subfield types refer to device ids */
|
||||
update_last_devid(dev->devid);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
|
@ -242,6 +337,11 @@ static int __init find_last_devid_from_ivhd(struct ivhd_header *h)
|
|||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Iterate over all IVHD entries in the ACPI table and find the highest device
|
||||
* id which we need to handle. This is the first of three functions which parse
|
||||
* the ACPI table. So we check the checksum here.
|
||||
*/
|
||||
static int __init find_last_devid_acpi(struct acpi_table_header *table)
|
||||
{
|
||||
int i;
|
||||
|
@ -277,19 +377,31 @@ static int __init find_last_devid_acpi(struct acpi_table_header *table)
|
|||
return 0;
|
||||
}
|
||||
|
||||
/****************************************************************************
|
||||
*
|
||||
* The following functions belong the the code path which parses the ACPI table
|
||||
* the second time. In this ACPI parsing iteration we allocate IOMMU specific
|
||||
* data structures, initialize the device/alias/rlookup table and also
|
||||
* basically initialize the hardware.
|
||||
*
|
||||
****************************************************************************/
|
||||
|
||||
/*
|
||||
* Allocates the command buffer. This buffer is per AMD IOMMU. We can
|
||||
* write commands to that buffer later and the IOMMU will execute them
|
||||
* asynchronously
|
||||
*/
|
||||
static u8 * __init alloc_command_buffer(struct amd_iommu *iommu)
|
||||
{
|
||||
u8 *cmd_buf = (u8 *)__get_free_pages(GFP_KERNEL,
|
||||
u8 *cmd_buf = (u8 *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
|
||||
get_order(CMD_BUFFER_SIZE));
|
||||
u64 entry = 0;
|
||||
u64 entry;
|
||||
|
||||
if (cmd_buf == NULL)
|
||||
return NULL;
|
||||
|
||||
iommu->cmd_buf_size = CMD_BUFFER_SIZE;
|
||||
|
||||
memset(cmd_buf, 0, CMD_BUFFER_SIZE);
|
||||
|
||||
entry = (u64)virt_to_phys(cmd_buf);
|
||||
entry |= MMIO_CMD_SIZE_512;
|
||||
memcpy_toio(iommu->mmio_base + MMIO_CMD_BUF_OFFSET,
|
||||
|
@ -302,11 +414,10 @@ static u8 * __init alloc_command_buffer(struct amd_iommu *iommu)
|
|||
|
||||
static void __init free_command_buffer(struct amd_iommu *iommu)
|
||||
{
|
||||
if (iommu->cmd_buf)
|
||||
free_pages((unsigned long)iommu->cmd_buf,
|
||||
get_order(CMD_BUFFER_SIZE));
|
||||
free_pages((unsigned long)iommu->cmd_buf, get_order(CMD_BUFFER_SIZE));
|
||||
}
|
||||
|
||||
/* sets a specific bit in the device table entry. */
|
||||
static void set_dev_entry_bit(u16 devid, u8 bit)
|
||||
{
|
||||
int i = (bit >> 5) & 0x07;
|
||||
|
@ -315,7 +426,18 @@ static void set_dev_entry_bit(u16 devid, u8 bit)
|
|||
amd_iommu_dev_table[devid].data[i] |= (1 << _bit);
|
||||
}
|
||||
|
||||
static void __init set_dev_entry_from_acpi(u16 devid, u32 flags, u32 ext_flags)
|
||||
/* Writes the specific IOMMU for a device into the rlookup table */
|
||||
static void __init set_iommu_for_device(struct amd_iommu *iommu, u16 devid)
|
||||
{
|
||||
amd_iommu_rlookup_table[devid] = iommu;
|
||||
}
|
||||
|
||||
/*
|
||||
* This function takes the device specific flags read from the ACPI
|
||||
* table and sets up the device table entry with that information
|
||||
*/
|
||||
static void __init set_dev_entry_from_acpi(struct amd_iommu *iommu,
|
||||
u16 devid, u32 flags, u32 ext_flags)
|
||||
{
|
||||
if (flags & ACPI_DEVFLAG_INITPASS)
|
||||
set_dev_entry_bit(devid, DEV_ENTRY_INIT_PASS);
|
||||
|
@ -331,13 +453,14 @@ static void __init set_dev_entry_from_acpi(u16 devid, u32 flags, u32 ext_flags)
|
|||
set_dev_entry_bit(devid, DEV_ENTRY_LINT0_PASS);
|
||||
if (flags & ACPI_DEVFLAG_LINT1)
|
||||
set_dev_entry_bit(devid, DEV_ENTRY_LINT1_PASS);
|
||||
|
||||
set_iommu_for_device(iommu, devid);
|
||||
}
|
||||
|
||||
static void __init set_iommu_for_device(struct amd_iommu *iommu, u16 devid)
|
||||
{
|
||||
amd_iommu_rlookup_table[devid] = iommu;
|
||||
}
|
||||
|
||||
/*
|
||||
* Reads the device exclusion range from ACPI and initialize IOMMU with
|
||||
* it
|
||||
*/
|
||||
static void __init set_device_exclusion_range(u16 devid, struct ivmd_header *m)
|
||||
{
|
||||
struct amd_iommu *iommu = amd_iommu_rlookup_table[devid];
|
||||
|
@ -346,12 +469,22 @@ static void __init set_device_exclusion_range(u16 devid, struct ivmd_header *m)
|
|||
return;
|
||||
|
||||
if (iommu) {
|
||||
/*
|
||||
* We only can configure exclusion ranges per IOMMU, not
|
||||
* per device. But we can enable the exclusion range per
|
||||
* device. This is done here
|
||||
*/
|
||||
set_dev_entry_bit(m->devid, DEV_ENTRY_EX);
|
||||
iommu->exclusion_start = m->range_start;
|
||||
iommu->exclusion_length = m->range_length;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* This function reads some important data from the IOMMU PCI space and
|
||||
* initializes the driver data structure with it. It reads the hardware
|
||||
* capabilities and the first/last device entries
|
||||
*/
|
||||
static void __init init_iommu_from_pci(struct amd_iommu *iommu)
|
||||
{
|
||||
int bus = PCI_BUS(iommu->devid);
|
||||
|
@ -363,10 +496,16 @@ static void __init init_iommu_from_pci(struct amd_iommu *iommu)
|
|||
iommu->cap = read_pci_config(bus, dev, fn, cap_ptr+MMIO_CAP_HDR_OFFSET);
|
||||
|
||||
range = read_pci_config(bus, dev, fn, cap_ptr+MMIO_RANGE_OFFSET);
|
||||
iommu->first_device = DEVID(MMIO_GET_BUS(range), MMIO_GET_FD(range));
|
||||
iommu->last_device = DEVID(MMIO_GET_BUS(range), MMIO_GET_LD(range));
|
||||
iommu->first_device = calc_devid(MMIO_GET_BUS(range),
|
||||
MMIO_GET_FD(range));
|
||||
iommu->last_device = calc_devid(MMIO_GET_BUS(range),
|
||||
MMIO_GET_LD(range));
|
||||
}
|
||||
|
||||
/*
|
||||
* Takes a pointer to an AMD IOMMU entry in the ACPI table and
|
||||
* initializes the hardware and our data structures with it.
|
||||
*/
|
||||
static void __init init_iommu_from_acpi(struct amd_iommu *iommu,
|
||||
struct ivhd_header *h)
|
||||
{
|
||||
|
@ -374,7 +513,7 @@ static void __init init_iommu_from_acpi(struct amd_iommu *iommu,
|
|||
u8 *end = p, flags = 0;
|
||||
u16 dev_i, devid = 0, devid_start = 0, devid_to = 0;
|
||||
u32 ext_flags = 0;
|
||||
bool alias = 0;
|
||||
bool alias = false;
|
||||
struct ivhd_entry *e;
|
||||
|
||||
/*
|
||||
|
@ -414,22 +553,23 @@ static void __init init_iommu_from_acpi(struct amd_iommu *iommu,
|
|||
case IVHD_DEV_ALL:
|
||||
for (dev_i = iommu->first_device;
|
||||
dev_i <= iommu->last_device; ++dev_i)
|
||||
set_dev_entry_from_acpi(dev_i, e->flags, 0);
|
||||
set_dev_entry_from_acpi(iommu, dev_i,
|
||||
e->flags, 0);
|
||||
break;
|
||||
case IVHD_DEV_SELECT:
|
||||
devid = e->devid;
|
||||
set_dev_entry_from_acpi(devid, e->flags, 0);
|
||||
set_dev_entry_from_acpi(iommu, devid, e->flags, 0);
|
||||
break;
|
||||
case IVHD_DEV_SELECT_RANGE_START:
|
||||
devid_start = e->devid;
|
||||
flags = e->flags;
|
||||
ext_flags = 0;
|
||||
alias = 0;
|
||||
alias = false;
|
||||
break;
|
||||
case IVHD_DEV_ALIAS:
|
||||
devid = e->devid;
|
||||
devid_to = e->ext >> 8;
|
||||
set_dev_entry_from_acpi(devid, e->flags, 0);
|
||||
set_dev_entry_from_acpi(iommu, devid, e->flags, 0);
|
||||
amd_iommu_alias_table[devid] = devid_to;
|
||||
break;
|
||||
case IVHD_DEV_ALIAS_RANGE:
|
||||
|
@ -437,24 +577,25 @@ static void __init init_iommu_from_acpi(struct amd_iommu *iommu,
|
|||
flags = e->flags;
|
||||
devid_to = e->ext >> 8;
|
||||
ext_flags = 0;
|
||||
alias = 1;
|
||||
alias = true;
|
||||
break;
|
||||
case IVHD_DEV_EXT_SELECT:
|
||||
devid = e->devid;
|
||||
set_dev_entry_from_acpi(devid, e->flags, e->ext);
|
||||
set_dev_entry_from_acpi(iommu, devid, e->flags,
|
||||
e->ext);
|
||||
break;
|
||||
case IVHD_DEV_EXT_SELECT_RANGE:
|
||||
devid_start = e->devid;
|
||||
flags = e->flags;
|
||||
ext_flags = e->ext;
|
||||
alias = 0;
|
||||
alias = false;
|
||||
break;
|
||||
case IVHD_DEV_RANGE_END:
|
||||
devid = e->devid;
|
||||
for (dev_i = devid_start; dev_i <= devid; ++dev_i) {
|
||||
if (alias)
|
||||
amd_iommu_alias_table[dev_i] = devid_to;
|
||||
set_dev_entry_from_acpi(
|
||||
set_dev_entry_from_acpi(iommu,
|
||||
amd_iommu_alias_table[dev_i],
|
||||
flags, ext_flags);
|
||||
}
|
||||
|
@ -467,6 +608,7 @@ static void __init init_iommu_from_acpi(struct amd_iommu *iommu,
|
|||
}
|
||||
}
|
||||
|
||||
/* Initializes the device->iommu mapping for the driver */
|
||||
static int __init init_iommu_devices(struct amd_iommu *iommu)
|
||||
{
|
||||
u16 i;
|
||||
|
@ -494,6 +636,11 @@ static void __init free_iommu_all(void)
|
|||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* This function clues the initialization function for one IOMMU
|
||||
* together and also allocates the command buffer and programs the
|
||||
* hardware. It does NOT enable the IOMMU. This is done afterwards.
|
||||
*/
|
||||
static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h)
|
||||
{
|
||||
spin_lock_init(&iommu->lock);
|
||||
|
@ -521,6 +668,10 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h)
|
|||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Iterates over all IOMMU entries in the ACPI table, allocates the
|
||||
* IOMMU structure and initializes it with init_iommu_one()
|
||||
*/
|
||||
static int __init init_iommu_all(struct acpi_table_header *table)
|
||||
{
|
||||
u8 *p = (u8 *)table, *end = (u8 *)table;
|
||||
|
@ -528,8 +679,6 @@ static int __init init_iommu_all(struct acpi_table_header *table)
|
|||
struct amd_iommu *iommu;
|
||||
int ret;
|
||||
|
||||
INIT_LIST_HEAD(&amd_iommu_list);
|
||||
|
||||
end += table->length;
|
||||
p += IVRS_HEADER_LENGTH;
|
||||
|
||||
|
@ -555,6 +704,14 @@ static int __init init_iommu_all(struct acpi_table_header *table)
|
|||
return 0;
|
||||
}
|
||||
|
||||
/****************************************************************************
|
||||
*
|
||||
* The next functions belong to the third pass of parsing the ACPI
|
||||
* table. In this last pass the memory mapping requirements are
|
||||
* gathered (like exclusion and unity mapping reanges).
|
||||
*
|
||||
****************************************************************************/
|
||||
|
||||
static void __init free_unity_maps(void)
|
||||
{
|
||||
struct unity_map_entry *entry, *next;
|
||||
|
@ -565,6 +722,7 @@ static void __init free_unity_maps(void)
|
|||
}
|
||||
}
|
||||
|
||||
/* called when we find an exclusion range definition in ACPI */
|
||||
static int __init init_exclusion_range(struct ivmd_header *m)
|
||||
{
|
||||
int i;
|
||||
|
@ -588,6 +746,7 @@ static int __init init_exclusion_range(struct ivmd_header *m)
|
|||
return 0;
|
||||
}
|
||||
|
||||
/* called for unity map ACPI definition */
|
||||
static int __init init_unity_map_range(struct ivmd_header *m)
|
||||
{
|
||||
struct unity_map_entry *e = 0;
|
||||
|
@ -619,13 +778,12 @@ static int __init init_unity_map_range(struct ivmd_header *m)
|
|||
return 0;
|
||||
}
|
||||
|
||||
/* iterates over all memory definitions we find in the ACPI table */
|
||||
static int __init init_memory_definitions(struct acpi_table_header *table)
|
||||
{
|
||||
u8 *p = (u8 *)table, *end = (u8 *)table;
|
||||
struct ivmd_header *m;
|
||||
|
||||
INIT_LIST_HEAD(&amd_iommu_unity_map);
|
||||
|
||||
end += table->length;
|
||||
p += IVRS_HEADER_LENGTH;
|
||||
|
||||
|
@ -642,6 +800,10 @@ static int __init init_memory_definitions(struct acpi_table_header *table)
|
|||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* This function finally enables all IOMMUs found in the system after
|
||||
* they have been initialized
|
||||
*/
|
||||
static void __init enable_iommus(void)
|
||||
{
|
||||
struct amd_iommu *iommu;
|
||||
|
@ -678,6 +840,34 @@ static struct sys_device device_amd_iommu = {
|
|||
.cls = &amd_iommu_sysdev_class,
|
||||
};
|
||||
|
||||
/*
|
||||
* This is the core init function for AMD IOMMU hardware in the system.
|
||||
* This function is called from the generic x86 DMA layer initialization
|
||||
* code.
|
||||
*
|
||||
* This function basically parses the ACPI table for AMD IOMMU (IVRS)
|
||||
* three times:
|
||||
*
|
||||
* 1 pass) Find the highest PCI device id the driver has to handle.
|
||||
* Upon this information the size of the data structures is
|
||||
* determined that needs to be allocated.
|
||||
*
|
||||
* 2 pass) Initialize the data structures just allocated with the
|
||||
* information in the ACPI table about available AMD IOMMUs
|
||||
* in the system. It also maps the PCI devices in the
|
||||
* system to specific IOMMUs
|
||||
*
|
||||
* 3 pass) After the basic data structures are allocated and
|
||||
* initialized we update them with information about memory
|
||||
* remapping requirements parsed out of the ACPI table in
|
||||
* this last pass.
|
||||
*
|
||||
* After that the hardware is initialized and ready to go. In the last
|
||||
* step we do some Linux specific things like registering the driver in
|
||||
* the dma_ops interface and initializing the suspend/resume support
|
||||
* functions. Finally it prints some information about AMD IOMMUs and
|
||||
* the driver state and enables the hardware.
|
||||
*/
|
||||
int __init amd_iommu_init(void)
|
||||
{
|
||||
int i, ret = 0;
|
||||
|
@ -699,14 +889,14 @@ int __init amd_iommu_init(void)
|
|||
if (acpi_table_parse("IVRS", find_last_devid_acpi) != 0)
|
||||
return -ENODEV;
|
||||
|
||||
dev_table_size = TBL_SIZE(DEV_TABLE_ENTRY_SIZE);
|
||||
alias_table_size = TBL_SIZE(ALIAS_TABLE_ENTRY_SIZE);
|
||||
rlookup_table_size = TBL_SIZE(RLOOKUP_TABLE_ENTRY_SIZE);
|
||||
dev_table_size = tbl_size(DEV_TABLE_ENTRY_SIZE);
|
||||
alias_table_size = tbl_size(ALIAS_TABLE_ENTRY_SIZE);
|
||||
rlookup_table_size = tbl_size(RLOOKUP_TABLE_ENTRY_SIZE);
|
||||
|
||||
ret = -ENOMEM;
|
||||
|
||||
/* Device table - directly used by all IOMMUs */
|
||||
amd_iommu_dev_table = (void *)__get_free_pages(GFP_KERNEL,
|
||||
amd_iommu_dev_table = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
|
||||
get_order(dev_table_size));
|
||||
if (amd_iommu_dev_table == NULL)
|
||||
goto out;
|
||||
|
@ -730,27 +920,23 @@ int __init amd_iommu_init(void)
|
|||
* Protection Domain table - maps devices to protection domains
|
||||
* This table has the same size as the rlookup_table
|
||||
*/
|
||||
amd_iommu_pd_table = (void *)__get_free_pages(GFP_KERNEL,
|
||||
amd_iommu_pd_table = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
|
||||
get_order(rlookup_table_size));
|
||||
if (amd_iommu_pd_table == NULL)
|
||||
goto free;
|
||||
|
||||
amd_iommu_pd_alloc_bitmap = (void *)__get_free_pages(GFP_KERNEL,
|
||||
amd_iommu_pd_alloc_bitmap = (void *)__get_free_pages(
|
||||
GFP_KERNEL | __GFP_ZERO,
|
||||
get_order(MAX_DOMAIN_ID/8));
|
||||
if (amd_iommu_pd_alloc_bitmap == NULL)
|
||||
goto free;
|
||||
|
||||
/*
|
||||
* memory is allocated now; initialize the device table with all zeroes
|
||||
* and let all alias entries point to itself
|
||||
* let all alias entries point to itself
|
||||
*/
|
||||
memset(amd_iommu_dev_table, 0, dev_table_size);
|
||||
for (i = 0; i < amd_iommu_last_bdf; ++i)
|
||||
amd_iommu_alias_table[i] = i;
|
||||
|
||||
memset(amd_iommu_pd_table, 0, rlookup_table_size);
|
||||
memset(amd_iommu_pd_alloc_bitmap, 0, MAX_DOMAIN_ID / 8);
|
||||
|
||||
/*
|
||||
* never allocate domain 0 because its used as the non-allocated and
|
||||
* error value placeholder
|
||||
|
@ -795,24 +981,19 @@ out:
|
|||
return ret;
|
||||
|
||||
free:
|
||||
if (amd_iommu_pd_alloc_bitmap)
|
||||
free_pages((unsigned long)amd_iommu_pd_alloc_bitmap, 1);
|
||||
free_pages((unsigned long)amd_iommu_pd_alloc_bitmap, 1);
|
||||
|
||||
if (amd_iommu_pd_table)
|
||||
free_pages((unsigned long)amd_iommu_pd_table,
|
||||
get_order(rlookup_table_size));
|
||||
free_pages((unsigned long)amd_iommu_pd_table,
|
||||
get_order(rlookup_table_size));
|
||||
|
||||
if (amd_iommu_rlookup_table)
|
||||
free_pages((unsigned long)amd_iommu_rlookup_table,
|
||||
get_order(rlookup_table_size));
|
||||
free_pages((unsigned long)amd_iommu_rlookup_table,
|
||||
get_order(rlookup_table_size));
|
||||
|
||||
if (amd_iommu_alias_table)
|
||||
free_pages((unsigned long)amd_iommu_alias_table,
|
||||
get_order(alias_table_size));
|
||||
free_pages((unsigned long)amd_iommu_alias_table,
|
||||
get_order(alias_table_size));
|
||||
|
||||
if (amd_iommu_dev_table)
|
||||
free_pages((unsigned long)amd_iommu_dev_table,
|
||||
get_order(dev_table_size));
|
||||
free_pages((unsigned long)amd_iommu_dev_table,
|
||||
get_order(dev_table_size));
|
||||
|
||||
free_iommu_all();
|
||||
|
||||
|
@ -821,6 +1002,13 @@ free:
|
|||
goto out;
|
||||
}
|
||||
|
||||
/****************************************************************************
|
||||
*
|
||||
* Early detect code. This code runs at IOMMU detection time in the DMA
|
||||
* layer. It just looks if there is an IVRS ACPI table to detect AMD
|
||||
* IOMMUs
|
||||
*
|
||||
****************************************************************************/
|
||||
static int __init early_amd_iommu_detect(struct acpi_table_header *table)
|
||||
{
|
||||
return 0;
|
||||
|
@ -828,7 +1016,7 @@ static int __init early_amd_iommu_detect(struct acpi_table_header *table)
|
|||
|
||||
void __init amd_iommu_detect(void)
|
||||
{
|
||||
if (swiotlb || no_iommu || iommu_detected)
|
||||
if (swiotlb || no_iommu || (iommu_detected && !gart_iommu_aperture))
|
||||
return;
|
||||
|
||||
if (acpi_table_parse("IVRS", early_amd_iommu_detect) == 0) {
|
||||
|
@ -841,6 +1029,13 @@ void __init amd_iommu_detect(void)
|
|||
}
|
||||
}
|
||||
|
||||
/****************************************************************************
|
||||
*
|
||||
* Parsing functions for the AMD IOMMU specific kernel command line
|
||||
* options.
|
||||
*
|
||||
****************************************************************************/
|
||||
|
||||
static int __init parse_amd_iommu_options(char *str)
|
||||
{
|
||||
for (; *str; ++str) {
|
||||
|
@ -853,20 +1048,10 @@ static int __init parse_amd_iommu_options(char *str)
|
|||
|
||||
static int __init parse_amd_iommu_size_options(char *str)
|
||||
{
|
||||
for (; *str; ++str) {
|
||||
if (strcmp(str, "32M") == 0)
|
||||
amd_iommu_aperture_order = 25;
|
||||
if (strcmp(str, "64M") == 0)
|
||||
amd_iommu_aperture_order = 26;
|
||||
if (strcmp(str, "128M") == 0)
|
||||
amd_iommu_aperture_order = 27;
|
||||
if (strcmp(str, "256M") == 0)
|
||||
amd_iommu_aperture_order = 28;
|
||||
if (strcmp(str, "512M") == 0)
|
||||
amd_iommu_aperture_order = 29;
|
||||
if (strcmp(str, "1G") == 0)
|
||||
amd_iommu_aperture_order = 30;
|
||||
}
|
||||
unsigned order = PAGE_SHIFT + get_order(memparse(str, &str));
|
||||
|
||||
if ((order > 24) && (order < 31))
|
||||
amd_iommu_aperture_order = order;
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
|
|
@ -21,6 +21,7 @@
|
|||
#include <linux/suspend.h>
|
||||
#include <asm/e820.h>
|
||||
#include <asm/io.h>
|
||||
#include <asm/iommu.h>
|
||||
#include <asm/gart.h>
|
||||
#include <asm/pci-direct.h>
|
||||
#include <asm/dma.h>
|
||||
|
|
|
@ -75,7 +75,7 @@ char system_vectors[NR_VECTORS] = { [0 ... NR_VECTORS-1] = SYS_VECTOR_FREE};
|
|||
/*
|
||||
* Debug level, exported for io_apic.c
|
||||
*/
|
||||
int apic_verbosity;
|
||||
unsigned int apic_verbosity;
|
||||
|
||||
int pic_mode;
|
||||
|
||||
|
@ -177,7 +177,7 @@ void __cpuinit enable_NMI_through_LVT0(void)
|
|||
/* Level triggered for 82489DX */
|
||||
if (!lapic_is_integrated())
|
||||
v |= APIC_LVT_LEVEL_TRIGGER;
|
||||
apic_write_around(APIC_LVT0, v);
|
||||
apic_write(APIC_LVT0, v);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -212,9 +212,6 @@ int lapic_get_maxlvt(void)
|
|||
* this function twice on the boot CPU, once with a bogus timeout
|
||||
* value, second time for real. The other (noncalibrating) CPUs
|
||||
* call this function only once, with the real, calibrated value.
|
||||
*
|
||||
* We do reads before writes even if unnecessary, to get around the
|
||||
* P5 APIC double write bug.
|
||||
*/
|
||||
static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen)
|
||||
{
|
||||
|
@ -229,18 +226,18 @@ static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen)
|
|||
if (!irqen)
|
||||
lvtt_value |= APIC_LVT_MASKED;
|
||||
|
||||
apic_write_around(APIC_LVTT, lvtt_value);
|
||||
apic_write(APIC_LVTT, lvtt_value);
|
||||
|
||||
/*
|
||||
* Divide PICLK by 16
|
||||
*/
|
||||
tmp_value = apic_read(APIC_TDCR);
|
||||
apic_write_around(APIC_TDCR, (tmp_value
|
||||
& ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE))
|
||||
| APIC_TDR_DIV_16);
|
||||
apic_write(APIC_TDCR,
|
||||
(tmp_value & ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE)) |
|
||||
APIC_TDR_DIV_16);
|
||||
|
||||
if (!oneshot)
|
||||
apic_write_around(APIC_TMICT, clocks/APIC_DIVISOR);
|
||||
apic_write(APIC_TMICT, clocks / APIC_DIVISOR);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -249,7 +246,7 @@ static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen)
|
|||
static int lapic_next_event(unsigned long delta,
|
||||
struct clock_event_device *evt)
|
||||
{
|
||||
apic_write_around(APIC_TMICT, delta);
|
||||
apic_write(APIC_TMICT, delta);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -278,7 +275,7 @@ static void lapic_timer_setup(enum clock_event_mode mode,
|
|||
case CLOCK_EVT_MODE_SHUTDOWN:
|
||||
v = apic_read(APIC_LVTT);
|
||||
v |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR);
|
||||
apic_write_around(APIC_LVTT, v);
|
||||
apic_write(APIC_LVTT, v);
|
||||
break;
|
||||
case CLOCK_EVT_MODE_RESUME:
|
||||
/* Nothing to do here */
|
||||
|
@ -372,12 +369,7 @@ static void __init lapic_cal_handler(struct clock_event_device *dev)
|
|||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Setup the boot APIC
|
||||
*
|
||||
* Calibrate and verify the result.
|
||||
*/
|
||||
void __init setup_boot_APIC_clock(void)
|
||||
static int __init calibrate_APIC_clock(void)
|
||||
{
|
||||
struct clock_event_device *levt = &__get_cpu_var(lapic_events);
|
||||
const long pm_100ms = PMTMR_TICKS_PER_SEC/10;
|
||||
|
@ -387,24 +379,6 @@ void __init setup_boot_APIC_clock(void)
|
|||
long delta, deltapm;
|
||||
int pm_referenced = 0;
|
||||
|
||||
/*
|
||||
* The local apic timer can be disabled via the kernel
|
||||
* commandline or from the CPU detection code. Register the lapic
|
||||
* timer as a dummy clock event source on SMP systems, so the
|
||||
* broadcast mechanism is used. On UP systems simply ignore it.
|
||||
*/
|
||||
if (local_apic_timer_disabled) {
|
||||
/* No broadcast on UP ! */
|
||||
if (num_possible_cpus() > 1) {
|
||||
lapic_clockevent.mult = 1;
|
||||
setup_APIC_timer();
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
apic_printk(APIC_VERBOSE, "Using local APIC timer interrupts.\n"
|
||||
"calibrating APIC timer ...\n");
|
||||
|
||||
local_irq_disable();
|
||||
|
||||
/* Replace the global interrupt handler */
|
||||
|
@ -489,8 +463,6 @@ void __init setup_boot_APIC_clock(void)
|
|||
calibration_result / (1000000 / HZ),
|
||||
calibration_result % (1000000 / HZ));
|
||||
|
||||
local_apic_timer_verify_ok = 1;
|
||||
|
||||
/*
|
||||
* Do a sanity check on the APIC calibration result
|
||||
*/
|
||||
|
@ -498,12 +470,11 @@ void __init setup_boot_APIC_clock(void)
|
|||
local_irq_enable();
|
||||
printk(KERN_WARNING
|
||||
"APIC frequency too slow, disabling apic timer\n");
|
||||
/* No broadcast on UP ! */
|
||||
if (num_possible_cpus() > 1)
|
||||
setup_APIC_timer();
|
||||
return;
|
||||
return -1;
|
||||
}
|
||||
|
||||
local_apic_timer_verify_ok = 1;
|
||||
|
||||
/* We trust the pm timer based calibration */
|
||||
if (!pm_referenced) {
|
||||
apic_printk(APIC_VERBOSE, "... verify APIC timer\n");
|
||||
|
@ -543,22 +514,55 @@ void __init setup_boot_APIC_clock(void)
|
|||
if (!local_apic_timer_verify_ok) {
|
||||
printk(KERN_WARNING
|
||||
"APIC timer disabled due to verification failure.\n");
|
||||
/* No broadcast on UP ! */
|
||||
if (num_possible_cpus() == 1)
|
||||
return;
|
||||
} else {
|
||||
/*
|
||||
* If nmi_watchdog is set to IO_APIC, we need the
|
||||
* PIT/HPET going. Otherwise register lapic as a dummy
|
||||
* device.
|
||||
*/
|
||||
if (nmi_watchdog != NMI_IO_APIC)
|
||||
lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY;
|
||||
else
|
||||
printk(KERN_WARNING "APIC timer registered as dummy,"
|
||||
" due to nmi_watchdog=%d!\n", nmi_watchdog);
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Setup the boot APIC
|
||||
*
|
||||
* Calibrate and verify the result.
|
||||
*/
|
||||
void __init setup_boot_APIC_clock(void)
|
||||
{
|
||||
/*
|
||||
* The local apic timer can be disabled via the kernel
|
||||
* commandline or from the CPU detection code. Register the lapic
|
||||
* timer as a dummy clock event source on SMP systems, so the
|
||||
* broadcast mechanism is used. On UP systems simply ignore it.
|
||||
*/
|
||||
if (local_apic_timer_disabled) {
|
||||
/* No broadcast on UP ! */
|
||||
if (num_possible_cpus() > 1) {
|
||||
lapic_clockevent.mult = 1;
|
||||
setup_APIC_timer();
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
apic_printk(APIC_VERBOSE, "Using local APIC timer interrupts.\n"
|
||||
"calibrating APIC timer ...\n");
|
||||
|
||||
if (calibrate_APIC_clock()) {
|
||||
/* No broadcast on UP ! */
|
||||
if (num_possible_cpus() > 1)
|
||||
setup_APIC_timer();
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* If nmi_watchdog is set to IO_APIC, we need the
|
||||
* PIT/HPET going. Otherwise register lapic as a dummy
|
||||
* device.
|
||||
*/
|
||||
if (nmi_watchdog != NMI_IO_APIC)
|
||||
lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY;
|
||||
else
|
||||
printk(KERN_WARNING "APIC timer registered as dummy,"
|
||||
" due to nmi_watchdog=%d!\n", nmi_watchdog);
|
||||
|
||||
/* Setup the lapic or request the broadcast */
|
||||
setup_APIC_timer();
|
||||
}
|
||||
|
@ -693,44 +697,44 @@ void clear_local_APIC(void)
|
|||
*/
|
||||
if (maxlvt >= 3) {
|
||||
v = ERROR_APIC_VECTOR; /* any non-zero vector will do */
|
||||
apic_write_around(APIC_LVTERR, v | APIC_LVT_MASKED);
|
||||
apic_write(APIC_LVTERR, v | APIC_LVT_MASKED);
|
||||
}
|
||||
/*
|
||||
* Careful: we have to set masks only first to deassert
|
||||
* any level-triggered sources.
|
||||
*/
|
||||
v = apic_read(APIC_LVTT);
|
||||
apic_write_around(APIC_LVTT, v | APIC_LVT_MASKED);
|
||||
apic_write(APIC_LVTT, v | APIC_LVT_MASKED);
|
||||
v = apic_read(APIC_LVT0);
|
||||
apic_write_around(APIC_LVT0, v | APIC_LVT_MASKED);
|
||||
apic_write(APIC_LVT0, v | APIC_LVT_MASKED);
|
||||
v = apic_read(APIC_LVT1);
|
||||
apic_write_around(APIC_LVT1, v | APIC_LVT_MASKED);
|
||||
apic_write(APIC_LVT1, v | APIC_LVT_MASKED);
|
||||
if (maxlvt >= 4) {
|
||||
v = apic_read(APIC_LVTPC);
|
||||
apic_write_around(APIC_LVTPC, v | APIC_LVT_MASKED);
|
||||
apic_write(APIC_LVTPC, v | APIC_LVT_MASKED);
|
||||
}
|
||||
|
||||
/* lets not touch this if we didn't frob it */
|
||||
#ifdef CONFIG_X86_MCE_P4THERMAL
|
||||
if (maxlvt >= 5) {
|
||||
v = apic_read(APIC_LVTTHMR);
|
||||
apic_write_around(APIC_LVTTHMR, v | APIC_LVT_MASKED);
|
||||
apic_write(APIC_LVTTHMR, v | APIC_LVT_MASKED);
|
||||
}
|
||||
#endif
|
||||
/*
|
||||
* Clean APIC state for other OSs:
|
||||
*/
|
||||
apic_write_around(APIC_LVTT, APIC_LVT_MASKED);
|
||||
apic_write_around(APIC_LVT0, APIC_LVT_MASKED);
|
||||
apic_write_around(APIC_LVT1, APIC_LVT_MASKED);
|
||||
apic_write(APIC_LVTT, APIC_LVT_MASKED);
|
||||
apic_write(APIC_LVT0, APIC_LVT_MASKED);
|
||||
apic_write(APIC_LVT1, APIC_LVT_MASKED);
|
||||
if (maxlvt >= 3)
|
||||
apic_write_around(APIC_LVTERR, APIC_LVT_MASKED);
|
||||
apic_write(APIC_LVTERR, APIC_LVT_MASKED);
|
||||
if (maxlvt >= 4)
|
||||
apic_write_around(APIC_LVTPC, APIC_LVT_MASKED);
|
||||
apic_write(APIC_LVTPC, APIC_LVT_MASKED);
|
||||
|
||||
#ifdef CONFIG_X86_MCE_P4THERMAL
|
||||
if (maxlvt >= 5)
|
||||
apic_write_around(APIC_LVTTHMR, APIC_LVT_MASKED);
|
||||
apic_write(APIC_LVTTHMR, APIC_LVT_MASKED);
|
||||
#endif
|
||||
/* Integrated APIC (!82489DX) ? */
|
||||
if (lapic_is_integrated()) {
|
||||
|
@ -756,7 +760,7 @@ void disable_local_APIC(void)
|
|||
*/
|
||||
value = apic_read(APIC_SPIV);
|
||||
value &= ~APIC_SPIV_APIC_ENABLED;
|
||||
apic_write_around(APIC_SPIV, value);
|
||||
apic_write(APIC_SPIV, value);
|
||||
|
||||
/*
|
||||
* When LAPIC was disabled by the BIOS and enabled by the kernel,
|
||||
|
@ -865,8 +869,8 @@ void __init sync_Arb_IDs(void)
|
|||
apic_wait_icr_idle();
|
||||
|
||||
apic_printk(APIC_DEBUG, "Synchronizing Arb IDs.\n");
|
||||
apic_write_around(APIC_ICR, APIC_DEST_ALLINC | APIC_INT_LEVELTRIG
|
||||
| APIC_DM_INIT);
|
||||
apic_write(APIC_ICR,
|
||||
APIC_DEST_ALLINC | APIC_INT_LEVELTRIG | APIC_DM_INIT);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -902,16 +906,16 @@ void __init init_bsp_APIC(void)
|
|||
else
|
||||
value |= APIC_SPIV_FOCUS_DISABLED;
|
||||
value |= SPURIOUS_APIC_VECTOR;
|
||||
apic_write_around(APIC_SPIV, value);
|
||||
apic_write(APIC_SPIV, value);
|
||||
|
||||
/*
|
||||
* Set up the virtual wire mode.
|
||||
*/
|
||||
apic_write_around(APIC_LVT0, APIC_DM_EXTINT);
|
||||
apic_write(APIC_LVT0, APIC_DM_EXTINT);
|
||||
value = APIC_DM_NMI;
|
||||
if (!lapic_is_integrated()) /* 82489DX */
|
||||
value |= APIC_LVT_LEVEL_TRIGGER;
|
||||
apic_write_around(APIC_LVT1, value);
|
||||
apic_write(APIC_LVT1, value);
|
||||
}
|
||||
|
||||
static void __cpuinit lapic_setup_esr(void)
|
||||
|
@ -926,7 +930,7 @@ static void __cpuinit lapic_setup_esr(void)
|
|||
|
||||
/* enables sending errors */
|
||||
value = ERROR_APIC_VECTOR;
|
||||
apic_write_around(APIC_LVTERR, value);
|
||||
apic_write(APIC_LVTERR, value);
|
||||
/*
|
||||
* spec says clear errors after enabling vector.
|
||||
*/
|
||||
|
@ -989,7 +993,7 @@ void __cpuinit setup_local_APIC(void)
|
|||
*/
|
||||
value = apic_read(APIC_TASKPRI);
|
||||
value &= ~APIC_TPRI_MASK;
|
||||
apic_write_around(APIC_TASKPRI, value);
|
||||
apic_write(APIC_TASKPRI, value);
|
||||
|
||||
/*
|
||||
* After a crash, we no longer service the interrupts and a pending
|
||||
|
@ -1047,7 +1051,7 @@ void __cpuinit setup_local_APIC(void)
|
|||
* Set spurious IRQ vector
|
||||
*/
|
||||
value |= SPURIOUS_APIC_VECTOR;
|
||||
apic_write_around(APIC_SPIV, value);
|
||||
apic_write(APIC_SPIV, value);
|
||||
|
||||
/*
|
||||
* Set up LVT0, LVT1:
|
||||
|
@ -1069,7 +1073,7 @@ void __cpuinit setup_local_APIC(void)
|
|||
apic_printk(APIC_VERBOSE, "masked ExtINT on CPU#%d\n",
|
||||
smp_processor_id());
|
||||
}
|
||||
apic_write_around(APIC_LVT0, value);
|
||||
apic_write(APIC_LVT0, value);
|
||||
|
||||
/*
|
||||
* only the BP should see the LINT1 NMI signal, obviously.
|
||||
|
@ -1080,7 +1084,7 @@ void __cpuinit setup_local_APIC(void)
|
|||
value = APIC_DM_NMI | APIC_LVT_MASKED;
|
||||
if (!integrated) /* 82489DX */
|
||||
value |= APIC_LVT_LEVEL_TRIGGER;
|
||||
apic_write_around(APIC_LVT1, value);
|
||||
apic_write(APIC_LVT1, value);
|
||||
}
|
||||
|
||||
void __cpuinit end_local_APIC_setup(void)
|
||||
|
@ -1091,7 +1095,7 @@ void __cpuinit end_local_APIC_setup(void)
|
|||
/* Disable the local apic timer */
|
||||
value = apic_read(APIC_LVTT);
|
||||
value |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR);
|
||||
apic_write_around(APIC_LVTT, value);
|
||||
apic_write(APIC_LVTT, value);
|
||||
|
||||
setup_apic_nmi_watchdog(NULL);
|
||||
apic_pm_activate();
|
||||
|
@ -1214,9 +1218,6 @@ int apic_version[MAX_APICS];
|
|||
|
||||
int __init APIC_init_uniprocessor(void)
|
||||
{
|
||||
if (disable_apic)
|
||||
clear_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC);
|
||||
|
||||
if (!smp_found_config && !cpu_has_apic)
|
||||
return -1;
|
||||
|
||||
|
@ -1419,7 +1420,7 @@ void disconnect_bsp_APIC(int virt_wire_setup)
|
|||
value &= ~APIC_VECTOR_MASK;
|
||||
value |= APIC_SPIV_APIC_ENABLED;
|
||||
value |= 0xf;
|
||||
apic_write_around(APIC_SPIV, value);
|
||||
apic_write(APIC_SPIV, value);
|
||||
|
||||
if (!virt_wire_setup) {
|
||||
/*
|
||||
|
@ -1432,10 +1433,10 @@ void disconnect_bsp_APIC(int virt_wire_setup)
|
|||
APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED);
|
||||
value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING;
|
||||
value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_EXTINT);
|
||||
apic_write_around(APIC_LVT0, value);
|
||||
apic_write(APIC_LVT0, value);
|
||||
} else {
|
||||
/* Disable LVT0 */
|
||||
apic_write_around(APIC_LVT0, APIC_LVT_MASKED);
|
||||
apic_write(APIC_LVT0, APIC_LVT_MASKED);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -1449,7 +1450,7 @@ void disconnect_bsp_APIC(int virt_wire_setup)
|
|||
APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED);
|
||||
value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING;
|
||||
value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_NMI);
|
||||
apic_write_around(APIC_LVT1, value);
|
||||
apic_write(APIC_LVT1, value);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1700,7 +1701,7 @@ early_param("lapic", parse_lapic);
|
|||
static int __init parse_nolapic(char *arg)
|
||||
{
|
||||
disable_apic = 1;
|
||||
clear_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC);
|
||||
setup_clear_cpu_cap(X86_FEATURE_APIC);
|
||||
return 0;
|
||||
}
|
||||
early_param("nolapic", parse_nolapic);
|
||||
|
|
|
@ -54,7 +54,7 @@ EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok);
|
|||
/*
|
||||
* Debug level, exported for io_apic.c
|
||||
*/
|
||||
int apic_verbosity;
|
||||
unsigned int apic_verbosity;
|
||||
|
||||
/* Have we found an MP table */
|
||||
int smp_found_config;
|
||||
|
@ -314,7 +314,7 @@ static void setup_APIC_timer(void)
|
|||
|
||||
#define TICK_COUNT 100000000
|
||||
|
||||
static void __init calibrate_APIC_clock(void)
|
||||
static int __init calibrate_APIC_clock(void)
|
||||
{
|
||||
unsigned apic, apic_start;
|
||||
unsigned long tsc, tsc_start;
|
||||
|
@ -368,6 +368,17 @@ static void __init calibrate_APIC_clock(void)
|
|||
clockevent_delta2ns(0xF, &lapic_clockevent);
|
||||
|
||||
calibration_result = result / HZ;
|
||||
|
||||
/*
|
||||
* Do a sanity check on the APIC calibration result
|
||||
*/
|
||||
if (calibration_result < (1000000 / HZ)) {
|
||||
printk(KERN_WARNING
|
||||
"APIC frequency too slow, disabling apic timer\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -394,14 +405,7 @@ void __init setup_boot_APIC_clock(void)
|
|||
}
|
||||
|
||||
printk(KERN_INFO "Using local APIC timer interrupts.\n");
|
||||
calibrate_APIC_clock();
|
||||
|
||||
/*
|
||||
* Do a sanity check on the APIC calibration result
|
||||
*/
|
||||
if (calibration_result < (1000000 / HZ)) {
|
||||
printk(KERN_WARNING
|
||||
"APIC frequency too slow, disabling apic timer\n");
|
||||
if (calibrate_APIC_clock()) {
|
||||
/* No broadcast on UP ! */
|
||||
if (num_possible_cpus() > 1)
|
||||
setup_APIC_timer();
|
||||
|
@ -1337,7 +1341,7 @@ early_param("apic", apic_set_verbosity);
|
|||
static __init int setup_disableapic(char *str)
|
||||
{
|
||||
disable_apic = 1;
|
||||
clear_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC);
|
||||
setup_clear_cpu_cap(X86_FEATURE_APIC);
|
||||
return 0;
|
||||
}
|
||||
early_param("disableapic", setup_disableapic);
|
||||
|
|
|
@ -18,6 +18,8 @@
|
|||
#include <asm/ia32.h>
|
||||
#include <asm/bootparam.h>
|
||||
|
||||
#include <xen/interface/xen.h>
|
||||
|
||||
#define __NO_STUBS 1
|
||||
#undef __SYSCALL
|
||||
#undef _ASM_X86_64_UNISTD_H_
|
||||
|
@ -131,5 +133,14 @@ int main(void)
|
|||
OFFSET(BP_loadflags, boot_params, hdr.loadflags);
|
||||
OFFSET(BP_hardware_subarch, boot_params, hdr.hardware_subarch);
|
||||
OFFSET(BP_version, boot_params, hdr.version);
|
||||
|
||||
BLANK();
|
||||
DEFINE(PAGE_SIZE_asm, PAGE_SIZE);
|
||||
#ifdef CONFIG_XEN
|
||||
BLANK();
|
||||
OFFSET(XEN_vcpu_info_mask, vcpu_info, evtchn_upcall_mask);
|
||||
OFFSET(XEN_vcpu_info_pending, vcpu_info, evtchn_upcall_pending);
|
||||
#undef ENTRY
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -0,0 +1,48 @@
|
|||
/*
|
||||
* BIOS run time interface routines.
|
||||
*
|
||||
* Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
*/
|
||||
|
||||
#include <asm/uv/bios.h>
|
||||
|
||||
const char *
|
||||
x86_bios_strerror(long status)
|
||||
{
|
||||
const char *str;
|
||||
switch (status) {
|
||||
case 0: str = "Call completed without error"; break;
|
||||
case -1: str = "Not implemented"; break;
|
||||
case -2: str = "Invalid argument"; break;
|
||||
case -3: str = "Call completed with error"; break;
|
||||
default: str = "Unknown BIOS status code"; break;
|
||||
}
|
||||
return str;
|
||||
}
|
||||
|
||||
long
|
||||
x86_bios_freq_base(unsigned long which, unsigned long *ticks_per_second,
|
||||
unsigned long *drift_info)
|
||||
{
|
||||
struct uv_bios_retval isrv;
|
||||
|
||||
BIOS_CALL(isrv, BIOS_FREQ_BASE, which, 0, 0, 0, 0, 0, 0);
|
||||
*ticks_per_second = isrv.v0;
|
||||
*drift_info = isrv.v1;
|
||||
return isrv.status;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(x86_bios_freq_base);
|
|
@ -24,8 +24,6 @@
|
|||
extern void vide(void);
|
||||
__asm__(".align 4\nvide: ret");
|
||||
|
||||
int force_mwait __cpuinitdata;
|
||||
|
||||
static void __cpuinit early_init_amd(struct cpuinfo_x86 *c)
|
||||
{
|
||||
if (cpuid_eax(0x80000000) >= 0x80000007) {
|
||||
|
|
|
@ -115,6 +115,8 @@ static void __cpuinit early_init_amd(struct cpuinfo_x86 *c)
|
|||
/* c->x86_power is 8000_0007 edx. Bit 8 is constant TSC */
|
||||
if (c->x86_power & (1<<8))
|
||||
set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
|
||||
|
||||
set_cpu_cap(c, X86_FEATURE_SYSCALL32);
|
||||
}
|
||||
|
||||
static void __cpuinit init_amd(struct cpuinfo_x86 *c)
|
||||
|
|
|
@ -131,13 +131,7 @@ static void __init check_popad(void)
|
|||
* (for due to lack of "invlpg" and working WP on a i386)
|
||||
* - In order to run on anything without a TSC, we need to be
|
||||
* compiled for a i486.
|
||||
* - In order to support the local APIC on a buggy Pentium machine,
|
||||
* we need to be compiled with CONFIG_X86_GOOD_APIC disabled,
|
||||
* which happens implicitly if compiled for a Pentium or lower
|
||||
* (unless an advanced selection of CPU features is used) as an
|
||||
* otherwise config implies a properly working local APIC without
|
||||
* the need to do extra reads from the APIC.
|
||||
*/
|
||||
*/
|
||||
|
||||
static void __init check_config(void)
|
||||
{
|
||||
|
@ -151,21 +145,6 @@ static void __init check_config(void)
|
|||
if (boot_cpu_data.x86 == 3)
|
||||
panic("Kernel requires i486+ for 'invlpg' and other features");
|
||||
#endif
|
||||
|
||||
/*
|
||||
* If we were told we had a good local APIC, check for buggy Pentia,
|
||||
* i.e. all B steppings and the C2 stepping of P54C when using their
|
||||
* integrated APIC (see 11AP erratum in "Pentium Processor
|
||||
* Specification Update").
|
||||
*/
|
||||
#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_GOOD_APIC)
|
||||
if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL
|
||||
&& cpu_has_apic
|
||||
&& boot_cpu_data.x86 == 5
|
||||
&& boot_cpu_data.x86_model == 2
|
||||
&& (boot_cpu_data.x86_mask < 6 || boot_cpu_data.x86_mask == 11))
|
||||
panic("Kernel compiled for PMMX+, assumes a local APIC without the read-before-write bug!");
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -7,15 +7,13 @@
|
|||
#include <linux/module.h>
|
||||
#include <linux/kgdb.h>
|
||||
#include <linux/topology.h>
|
||||
#include <linux/string.h>
|
||||
#include <linux/delay.h>
|
||||
#include <linux/smp.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/percpu.h>
|
||||
#include <asm/processor.h>
|
||||
#include <asm/i387.h>
|
||||
#include <asm/msr.h>
|
||||
#include <asm/io.h>
|
||||
#include <asm/linkage.h>
|
||||
#include <asm/mmu_context.h>
|
||||
#include <asm/mtrr.h>
|
||||
#include <asm/mce.h>
|
||||
|
@ -305,7 +303,6 @@ static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c)
|
|||
c->x86_capability[2] = cpuid_edx(0x80860001);
|
||||
}
|
||||
|
||||
c->extended_cpuid_level = cpuid_eax(0x80000000);
|
||||
if (c->extended_cpuid_level >= 0x80000007)
|
||||
c->x86_power = cpuid_edx(0x80000007);
|
||||
|
||||
|
@ -316,18 +313,11 @@ static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c)
|
|||
c->x86_phys_bits = eax & 0xff;
|
||||
}
|
||||
|
||||
/* Assume all 64-bit CPUs support 32-bit syscall */
|
||||
set_cpu_cap(c, X86_FEATURE_SYSCALL32);
|
||||
|
||||
if (c->x86_vendor != X86_VENDOR_UNKNOWN &&
|
||||
cpu_devs[c->x86_vendor]->c_early_init)
|
||||
cpu_devs[c->x86_vendor]->c_early_init(c);
|
||||
|
||||
validate_pat_support(c);
|
||||
|
||||
/* early_param could clear that, but recall get it set again */
|
||||
if (disable_apic)
|
||||
clear_cpu_cap(c, X86_FEATURE_APIC);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -517,8 +507,7 @@ void pda_init(int cpu)
|
|||
}
|
||||
|
||||
char boot_exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ +
|
||||
DEBUG_STKSZ]
|
||||
__attribute__((section(".bss.page_aligned")));
|
||||
DEBUG_STKSZ] __page_aligned_bss;
|
||||
|
||||
extern asmlinkage void ignore_sysret(void);
|
||||
|
||||
|
|
|
@ -227,6 +227,16 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
|
|||
if (cpu_has_bts)
|
||||
ds_init_intel(c);
|
||||
|
||||
/*
|
||||
* See if we have a good local APIC by checking for buggy Pentia,
|
||||
* i.e. all B steppings and the C2 stepping of P54C when using their
|
||||
* integrated APIC (see 11AP erratum in "Pentium Processor
|
||||
* Specification Update").
|
||||
*/
|
||||
if (cpu_has_apic && (c->x86<<8 | c->x86_model<<4) == 0x520 &&
|
||||
(c->x86_mask < 0x6 || c->x86_mask == 0xb))
|
||||
set_cpu_cap(c, X86_FEATURE_11AP);
|
||||
|
||||
#ifdef CONFIG_X86_NUMAQ
|
||||
numaq_tsc_disable();
|
||||
#endif
|
||||
|
|
|
@ -102,7 +102,7 @@ static void intel_init_thermal(struct cpuinfo_x86 *c)
|
|||
/* The temperature transition interrupt handler setup */
|
||||
h = THERMAL_APIC_VECTOR; /* our delivery vector */
|
||||
h |= (APIC_DM_FIXED | APIC_LVT_MASKED); /* we'll mask till we're ready */
|
||||
apic_write_around(APIC_LVTTHMR, h);
|
||||
apic_write(APIC_LVTTHMR, h);
|
||||
|
||||
rdmsr(MSR_IA32_THERM_INTERRUPT, l, h);
|
||||
wrmsr(MSR_IA32_THERM_INTERRUPT, l | 0x03 , h);
|
||||
|
@ -114,7 +114,7 @@ static void intel_init_thermal(struct cpuinfo_x86 *c)
|
|||
wrmsr(MSR_IA32_MISC_ENABLE, l | (1<<3), h);
|
||||
|
||||
l = apic_read(APIC_LVTTHMR);
|
||||
apic_write_around(APIC_LVTTHMR, l & ~APIC_LVT_MASKED);
|
||||
apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED);
|
||||
printk(KERN_INFO "CPU%d: Thermal monitoring enabled\n", cpu);
|
||||
|
||||
/* enable thermal throttle processing */
|
||||
|
|
|
@ -877,7 +877,8 @@ void __init early_res_to_bootmem(u64 start, u64 end)
|
|||
for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++)
|
||||
count++;
|
||||
|
||||
printk(KERN_INFO "(%d early reservations) ==> bootmem\n", count);
|
||||
printk(KERN_INFO "(%d early reservations) ==> bootmem [%010llx - %010llx]\n",
|
||||
count, start, end);
|
||||
for (i = 0; i < count; i++) {
|
||||
struct early_res *r = &early_res[i];
|
||||
printk(KERN_INFO " #%d [%010llx - %010llx] %16s", i,
|
||||
|
@ -1298,11 +1299,6 @@ void __init e820_reserve_resources(void)
|
|||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Non-standard memory setup can be specified via this quirk:
|
||||
*/
|
||||
char * (*arch_memory_setup_quirk)(void);
|
||||
|
||||
char *__init default_machine_specific_memory_setup(void)
|
||||
{
|
||||
char *who = "BIOS-e820";
|
||||
|
@ -1343,8 +1339,8 @@ char *__init default_machine_specific_memory_setup(void)
|
|||
|
||||
char *__init __attribute__((weak)) machine_specific_memory_setup(void)
|
||||
{
|
||||
if (arch_memory_setup_quirk) {
|
||||
char *who = arch_memory_setup_quirk();
|
||||
if (x86_quirks->arch_memory_setup) {
|
||||
char *who = x86_quirks->arch_memory_setup();
|
||||
|
||||
if (who)
|
||||
return who;
|
||||
|
@ -1367,24 +1363,3 @@ void __init setup_memory_map(void)
|
|||
printk(KERN_INFO "BIOS-provided physical RAM map:\n");
|
||||
e820_print_map(who);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
int __init arch_get_ram_range(int slot, u64 *addr, u64 *size)
|
||||
{
|
||||
int i;
|
||||
|
||||
if (slot < 0 || slot >= e820.nr_map)
|
||||
return -1;
|
||||
for (i = slot; i < e820.nr_map; i++) {
|
||||
if (e820.map[i].type != E820_RAM)
|
||||
continue;
|
||||
break;
|
||||
}
|
||||
if (i == e820.nr_map || e820.map[i].addr > (max_pfn << PAGE_SHIFT))
|
||||
return -1;
|
||||
*addr = e820.map[i].addr;
|
||||
*size = min_t(u64, e820.map[i].size + e820.map[i].addr,
|
||||
max_pfn << PAGE_SHIFT) - *addr;
|
||||
return i + 1;
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -16,10 +16,7 @@
|
|||
#include <asm/dma.h>
|
||||
#include <asm/io_apic.h>
|
||||
#include <asm/apic.h>
|
||||
|
||||
#ifdef CONFIG_GART_IOMMU
|
||||
#include <asm/gart.h>
|
||||
#endif
|
||||
#include <asm/iommu.h>
|
||||
|
||||
static void __init fix_hypertransport_config(int num, int slot, int func)
|
||||
{
|
||||
|
|
|
@ -332,7 +332,7 @@ sysenter_past_esp:
|
|||
GET_THREAD_INFO(%ebp)
|
||||
|
||||
/* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */
|
||||
testw $(_TIF_SYSCALL_EMU|_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT),TI_flags(%ebp)
|
||||
testw $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp)
|
||||
jnz syscall_trace_entry
|
||||
cmpl $(nr_syscalls), %eax
|
||||
jae syscall_badsys
|
||||
|
@ -370,7 +370,7 @@ ENTRY(system_call)
|
|||
GET_THREAD_INFO(%ebp)
|
||||
# system call tracing in operation / emulation
|
||||
/* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */
|
||||
testw $(_TIF_SYSCALL_EMU|_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT),TI_flags(%ebp)
|
||||
testw $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp)
|
||||
jnz syscall_trace_entry
|
||||
cmpl $(nr_syscalls), %eax
|
||||
jae syscall_badsys
|
||||
|
@ -383,10 +383,6 @@ syscall_exit:
|
|||
# setting need_resched or sigpending
|
||||
# between sampling and the iret
|
||||
TRACE_IRQS_OFF
|
||||
testl $X86_EFLAGS_TF,PT_EFLAGS(%esp) # If tracing set singlestep flag on exit
|
||||
jz no_singlestep
|
||||
orl $_TIF_SINGLESTEP,TI_flags(%ebp)
|
||||
no_singlestep:
|
||||
movl TI_flags(%ebp), %ecx
|
||||
testw $_TIF_ALLWORK_MASK, %cx # current->work
|
||||
jne syscall_exit_work
|
||||
|
@ -514,12 +510,8 @@ END(work_pending)
|
|||
syscall_trace_entry:
|
||||
movl $-ENOSYS,PT_EAX(%esp)
|
||||
movl %esp, %eax
|
||||
xorl %edx,%edx
|
||||
call do_syscall_trace
|
||||
cmpl $0, %eax
|
||||
jne resume_userspace # ret != 0 -> running under PTRACE_SYSEMU,
|
||||
# so must skip actual syscall
|
||||
movl PT_ORIG_EAX(%esp), %eax
|
||||
call syscall_trace_enter
|
||||
/* What it returned is what we'll actually use. */
|
||||
cmpl $(nr_syscalls), %eax
|
||||
jnae syscall_call
|
||||
jmp syscall_exit
|
||||
|
@ -528,14 +520,13 @@ END(syscall_trace_entry)
|
|||
# perform syscall exit tracing
|
||||
ALIGN
|
||||
syscall_exit_work:
|
||||
testb $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP), %cl
|
||||
testb $_TIF_WORK_SYSCALL_EXIT, %cl
|
||||
jz work_pending
|
||||
TRACE_IRQS_ON
|
||||
ENABLE_INTERRUPTS(CLBR_ANY) # could let do_syscall_trace() call
|
||||
ENABLE_INTERRUPTS(CLBR_ANY) # could let syscall_trace_leave() call
|
||||
# schedule() instead
|
||||
movl %esp, %eax
|
||||
movl $1, %edx
|
||||
call do_syscall_trace
|
||||
call syscall_trace_leave
|
||||
jmp resume_userspace
|
||||
END(syscall_exit_work)
|
||||
CFI_ENDPROC
|
||||
|
@ -1024,6 +1015,7 @@ ENDPROC(kernel_thread_helper)
|
|||
ENTRY(xen_sysenter_target)
|
||||
RING0_INT_FRAME
|
||||
addl $5*4, %esp /* remove xen-provided frame */
|
||||
CFI_ADJUST_CFA_OFFSET -5*4
|
||||
jmp sysenter_past_esp
|
||||
CFI_ENDPROC
|
||||
|
||||
|
|
|
@ -349,8 +349,7 @@ ENTRY(system_call_after_swapgs)
|
|||
movq %rcx,RIP-ARGOFFSET(%rsp)
|
||||
CFI_REL_OFFSET rip,RIP-ARGOFFSET
|
||||
GET_THREAD_INFO(%rcx)
|
||||
testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP), \
|
||||
TI_flags(%rcx)
|
||||
testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%rcx)
|
||||
jnz tracesys
|
||||
cmpq $__NR_syscall_max,%rax
|
||||
ja badsys
|
||||
|
@ -430,7 +429,12 @@ tracesys:
|
|||
FIXUP_TOP_OF_STACK %rdi
|
||||
movq %rsp,%rdi
|
||||
call syscall_trace_enter
|
||||
LOAD_ARGS ARGOFFSET /* reload args from stack in case ptrace changed it */
|
||||
/*
|
||||
* Reload arg registers from stack in case ptrace changed them.
|
||||
* We don't reload %rax because syscall_trace_enter() returned
|
||||
* the value it wants us to use in the table lookup.
|
||||
*/
|
||||
LOAD_ARGS ARGOFFSET, 1
|
||||
RESTORE_REST
|
||||
cmpq $__NR_syscall_max,%rax
|
||||
ja int_ret_from_sys_call /* RAX(%rsp) set to -ENOSYS above */
|
||||
|
@ -483,7 +487,7 @@ int_very_careful:
|
|||
ENABLE_INTERRUPTS(CLBR_NONE)
|
||||
SAVE_REST
|
||||
/* Check for syscall exit trace */
|
||||
testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edx
|
||||
testl $_TIF_WORK_SYSCALL_EXIT,%edx
|
||||
jz int_signal
|
||||
pushq %rdi
|
||||
CFI_ADJUST_CFA_OFFSET 8
|
||||
|
@ -491,7 +495,7 @@ int_very_careful:
|
|||
call syscall_trace_leave
|
||||
popq %rdi
|
||||
CFI_ADJUST_CFA_OFFSET -8
|
||||
andl $~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edi
|
||||
andl $~(_TIF_WORK_SYSCALL_EXIT|_TIF_SYSCALL_EMU),%edi
|
||||
jmp int_restore_rest
|
||||
|
||||
int_signal:
|
||||
|
@ -1189,6 +1193,7 @@ END(device_not_available)
|
|||
/* runs on exception stack */
|
||||
KPROBE_ENTRY(debug)
|
||||
INTR_FRAME
|
||||
PARAVIRT_ADJUST_EXCEPTION_FRAME
|
||||
pushq $0
|
||||
CFI_ADJUST_CFA_OFFSET 8
|
||||
paranoidentry do_debug, DEBUG_STACK
|
||||
|
@ -1198,6 +1203,7 @@ KPROBE_END(debug)
|
|||
/* runs on exception stack */
|
||||
KPROBE_ENTRY(nmi)
|
||||
INTR_FRAME
|
||||
PARAVIRT_ADJUST_EXCEPTION_FRAME
|
||||
pushq $-1
|
||||
CFI_ADJUST_CFA_OFFSET 8
|
||||
paranoidentry do_nmi, 0, 0
|
||||
|
@ -1211,6 +1217,7 @@ KPROBE_END(nmi)
|
|||
|
||||
KPROBE_ENTRY(int3)
|
||||
INTR_FRAME
|
||||
PARAVIRT_ADJUST_EXCEPTION_FRAME
|
||||
pushq $0
|
||||
CFI_ADJUST_CFA_OFFSET 8
|
||||
paranoidentry do_int3, DEBUG_STACK
|
||||
|
@ -1237,6 +1244,7 @@ END(coprocessor_segment_overrun)
|
|||
/* runs on exception stack */
|
||||
ENTRY(double_fault)
|
||||
XCPT_FRAME
|
||||
PARAVIRT_ADJUST_EXCEPTION_FRAME
|
||||
paranoidentry do_double_fault
|
||||
jmp paranoid_exit1
|
||||
CFI_ENDPROC
|
||||
|
@ -1253,6 +1261,7 @@ END(segment_not_present)
|
|||
/* runs on exception stack */
|
||||
ENTRY(stack_segment)
|
||||
XCPT_FRAME
|
||||
PARAVIRT_ADJUST_EXCEPTION_FRAME
|
||||
paranoidentry do_stack_segment
|
||||
jmp paranoid_exit1
|
||||
CFI_ENDPROC
|
||||
|
@ -1278,6 +1287,7 @@ END(spurious_interrupt_bug)
|
|||
/* runs on exception stack */
|
||||
ENTRY(machine_check)
|
||||
INTR_FRAME
|
||||
PARAVIRT_ADJUST_EXCEPTION_FRAME
|
||||
pushq $0
|
||||
CFI_ADJUST_CFA_OFFSET 8
|
||||
paranoidentry do_machine_check
|
||||
|
@ -1312,3 +1322,103 @@ KPROBE_ENTRY(ignore_sysret)
|
|||
sysret
|
||||
CFI_ENDPROC
|
||||
ENDPROC(ignore_sysret)
|
||||
|
||||
#ifdef CONFIG_XEN
|
||||
ENTRY(xen_hypervisor_callback)
|
||||
zeroentry xen_do_hypervisor_callback
|
||||
END(xen_hypervisor_callback)
|
||||
|
||||
/*
|
||||
# A note on the "critical region" in our callback handler.
|
||||
# We want to avoid stacking callback handlers due to events occurring
|
||||
# during handling of the last event. To do this, we keep events disabled
|
||||
# until we've done all processing. HOWEVER, we must enable events before
|
||||
# popping the stack frame (can't be done atomically) and so it would still
|
||||
# be possible to get enough handler activations to overflow the stack.
|
||||
# Although unlikely, bugs of that kind are hard to track down, so we'd
|
||||
# like to avoid the possibility.
|
||||
# So, on entry to the handler we detect whether we interrupted an
|
||||
# existing activation in its critical region -- if so, we pop the current
|
||||
# activation and restart the handler using the previous one.
|
||||
*/
|
||||
ENTRY(xen_do_hypervisor_callback) # do_hypervisor_callback(struct *pt_regs)
|
||||
CFI_STARTPROC
|
||||
/* Since we don't modify %rdi, evtchn_do_upall(struct *pt_regs) will
|
||||
see the correct pointer to the pt_regs */
|
||||
movq %rdi, %rsp # we don't return, adjust the stack frame
|
||||
CFI_ENDPROC
|
||||
CFI_DEFAULT_STACK
|
||||
11: incl %gs:pda_irqcount
|
||||
movq %rsp,%rbp
|
||||
CFI_DEF_CFA_REGISTER rbp
|
||||
cmovzq %gs:pda_irqstackptr,%rsp
|
||||
pushq %rbp # backlink for old unwinder
|
||||
call xen_evtchn_do_upcall
|
||||
popq %rsp
|
||||
CFI_DEF_CFA_REGISTER rsp
|
||||
decl %gs:pda_irqcount
|
||||
jmp error_exit
|
||||
CFI_ENDPROC
|
||||
END(do_hypervisor_callback)
|
||||
|
||||
/*
|
||||
# Hypervisor uses this for application faults while it executes.
|
||||
# We get here for two reasons:
|
||||
# 1. Fault while reloading DS, ES, FS or GS
|
||||
# 2. Fault while executing IRET
|
||||
# Category 1 we do not need to fix up as Xen has already reloaded all segment
|
||||
# registers that could be reloaded and zeroed the others.
|
||||
# Category 2 we fix up by killing the current process. We cannot use the
|
||||
# normal Linux return path in this case because if we use the IRET hypercall
|
||||
# to pop the stack frame we end up in an infinite loop of failsafe callbacks.
|
||||
# We distinguish between categories by comparing each saved segment register
|
||||
# with its current contents: any discrepancy means we in category 1.
|
||||
*/
|
||||
ENTRY(xen_failsafe_callback)
|
||||
framesz = (RIP-0x30) /* workaround buggy gas */
|
||||
_frame framesz
|
||||
CFI_REL_OFFSET rcx, 0
|
||||
CFI_REL_OFFSET r11, 8
|
||||
movw %ds,%cx
|
||||
cmpw %cx,0x10(%rsp)
|
||||
CFI_REMEMBER_STATE
|
||||
jne 1f
|
||||
movw %es,%cx
|
||||
cmpw %cx,0x18(%rsp)
|
||||
jne 1f
|
||||
movw %fs,%cx
|
||||
cmpw %cx,0x20(%rsp)
|
||||
jne 1f
|
||||
movw %gs,%cx
|
||||
cmpw %cx,0x28(%rsp)
|
||||
jne 1f
|
||||
/* All segments match their saved values => Category 2 (Bad IRET). */
|
||||
movq (%rsp),%rcx
|
||||
CFI_RESTORE rcx
|
||||
movq 8(%rsp),%r11
|
||||
CFI_RESTORE r11
|
||||
addq $0x30,%rsp
|
||||
CFI_ADJUST_CFA_OFFSET -0x30
|
||||
pushq $0
|
||||
CFI_ADJUST_CFA_OFFSET 8
|
||||
pushq %r11
|
||||
CFI_ADJUST_CFA_OFFSET 8
|
||||
pushq %rcx
|
||||
CFI_ADJUST_CFA_OFFSET 8
|
||||
jmp general_protection
|
||||
CFI_RESTORE_STATE
|
||||
1: /* Segment mismatch => Category 1 (Bad segment). Retry the IRET. */
|
||||
movq (%rsp),%rcx
|
||||
CFI_RESTORE rcx
|
||||
movq 8(%rsp),%r11
|
||||
CFI_RESTORE r11
|
||||
addq $0x30,%rsp
|
||||
CFI_ADJUST_CFA_OFFSET -0x30
|
||||
pushq $0
|
||||
CFI_ADJUST_CFA_OFFSET 8
|
||||
SAVE_ALL
|
||||
jmp error_exit
|
||||
CFI_ENDPROC
|
||||
END(xen_failsafe_callback)
|
||||
|
||||
#endif /* CONFIG_XEN */
|
||||
|
|
|
@ -24,6 +24,7 @@
|
|||
#include <asm/pgtable.h>
|
||||
#include <asm/uv/uv_mmrs.h>
|
||||
#include <asm/uv/uv_hub.h>
|
||||
#include <asm/uv/bios.h>
|
||||
|
||||
DEFINE_PER_CPU(struct uv_hub_info_s, __uv_hub_info);
|
||||
EXPORT_PER_CPU_SYMBOL_GPL(__uv_hub_info);
|
||||
|
@ -40,6 +41,9 @@ EXPORT_SYMBOL_GPL(uv_cpu_to_blade);
|
|||
short uv_possible_blades;
|
||||
EXPORT_SYMBOL_GPL(uv_possible_blades);
|
||||
|
||||
unsigned long sn_rtc_cycles_per_second;
|
||||
EXPORT_SYMBOL(sn_rtc_cycles_per_second);
|
||||
|
||||
/* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */
|
||||
|
||||
static cpumask_t uv_target_cpus(void)
|
||||
|
@ -272,6 +276,23 @@ static __init void map_mmioh_high(int max_pnode)
|
|||
map_high("MMIOH", mmioh.s.base, shift, map_uc);
|
||||
}
|
||||
|
||||
static __init void uv_rtc_init(void)
|
||||
{
|
||||
long status, ticks_per_sec, drift;
|
||||
|
||||
status =
|
||||
x86_bios_freq_base(BIOS_FREQ_BASE_REALTIME_CLOCK, &ticks_per_sec,
|
||||
&drift);
|
||||
if (status != 0 || ticks_per_sec < 100000) {
|
||||
printk(KERN_WARNING
|
||||
"unable to determine platform RTC clock frequency, "
|
||||
"guessing.\n");
|
||||
/* BIOS gives wrong value for clock freq. so guess */
|
||||
sn_rtc_cycles_per_second = 1000000000000UL / 30000UL;
|
||||
} else
|
||||
sn_rtc_cycles_per_second = ticks_per_sec;
|
||||
}
|
||||
|
||||
static __init void uv_system_init(void)
|
||||
{
|
||||
union uvh_si_addr_map_config_u m_n_config;
|
||||
|
@ -326,6 +347,8 @@ static __init void uv_system_init(void)
|
|||
gnode_upper = (((unsigned long)node_id.s.node_id) &
|
||||
~((1 << n_val) - 1)) << m_val;
|
||||
|
||||
uv_rtc_init();
|
||||
|
||||
for_each_present_cpu(cpu) {
|
||||
nid = cpu_to_node(cpu);
|
||||
pnode = uv_apicid_to_pnode(per_cpu(x86_cpu_to_apicid, cpu));
|
||||
|
|
|
@ -39,6 +39,13 @@ static struct x8664_pda *__cpu_pda[NR_CPUS] __initdata;
|
|||
static struct x8664_pda *__cpu_pda[NR_CPUS] __read_mostly;
|
||||
#endif
|
||||
|
||||
void __init x86_64_init_pda(void)
|
||||
{
|
||||
_cpu_pda = __cpu_pda;
|
||||
cpu_pda(0) = &_boot_cpu_pda;
|
||||
pda_init(0);
|
||||
}
|
||||
|
||||
static void __init zap_identity_mappings(void)
|
||||
{
|
||||
pgd_t *pgd = pgd_offset_k(0UL);
|
||||
|
@ -102,9 +109,7 @@ void __init x86_64_start_kernel(char * real_mode_data)
|
|||
|
||||
early_printk("Kernel alive\n");
|
||||
|
||||
_cpu_pda = __cpu_pda;
|
||||
cpu_pda(0) = &_boot_cpu_pda;
|
||||
pda_init(0);
|
||||
x86_64_init_pda();
|
||||
|
||||
early_printk("Kernel really alive\n");
|
||||
|
||||
|
|
|
@ -407,6 +407,7 @@ ENTRY(phys_base)
|
|||
/* This must match the first entry in level2_kernel_pgt */
|
||||
.quad 0x0000000000000000
|
||||
|
||||
#include "../../x86/xen/xen-head.S"
|
||||
|
||||
.section .bss, "aw", @nobits
|
||||
.align L1_CACHE_BYTES
|
||||
|
|
|
@ -756,7 +756,7 @@ void send_IPI_self(int vector)
|
|||
/*
|
||||
* Send the IPI. The write to APIC_ICR fires this off.
|
||||
*/
|
||||
apic_write_around(APIC_ICR, cfg);
|
||||
apic_write(APIC_ICR, cfg);
|
||||
}
|
||||
#endif /* !CONFIG_SMP */
|
||||
|
||||
|
@ -2030,7 +2030,7 @@ static void mask_lapic_irq(unsigned int irq)
|
|||
unsigned long v;
|
||||
|
||||
v = apic_read(APIC_LVT0);
|
||||
apic_write_around(APIC_LVT0, v | APIC_LVT_MASKED);
|
||||
apic_write(APIC_LVT0, v | APIC_LVT_MASKED);
|
||||
}
|
||||
|
||||
static void unmask_lapic_irq(unsigned int irq)
|
||||
|
@ -2038,7 +2038,7 @@ static void unmask_lapic_irq(unsigned int irq)
|
|||
unsigned long v;
|
||||
|
||||
v = apic_read(APIC_LVT0);
|
||||
apic_write_around(APIC_LVT0, v & ~APIC_LVT_MASKED);
|
||||
apic_write(APIC_LVT0, v & ~APIC_LVT_MASKED);
|
||||
}
|
||||
|
||||
static struct irq_chip lapic_chip __read_mostly = {
|
||||
|
@ -2168,7 +2168,7 @@ static inline void __init check_timer(void)
|
|||
* The AEOI mode will finish them in the 8259A
|
||||
* automatically.
|
||||
*/
|
||||
apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
|
||||
apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
|
||||
init_8259A(1);
|
||||
timer_ack = (nmi_watchdog == NMI_IO_APIC && !APIC_INTEGRATED(ver));
|
||||
|
||||
|
@ -2177,8 +2177,9 @@ static inline void __init check_timer(void)
|
|||
pin2 = ioapic_i8259.pin;
|
||||
apic2 = ioapic_i8259.apic;
|
||||
|
||||
printk(KERN_INFO "..TIMER: vector=0x%02X apic1=%d pin1=%d apic2=%d pin2=%d\n",
|
||||
vector, apic1, pin1, apic2, pin2);
|
||||
apic_printk(APIC_QUIET, KERN_INFO "..TIMER: vector=0x%02X "
|
||||
"apic1=%d pin1=%d apic2=%d pin2=%d\n",
|
||||
vector, apic1, pin1, apic2, pin2);
|
||||
|
||||
/*
|
||||
* Some BIOS writers are clueless and report the ExtINTA
|
||||
|
@ -2216,12 +2217,13 @@ static inline void __init check_timer(void)
|
|||
}
|
||||
clear_IO_APIC_pin(apic1, pin1);
|
||||
if (!no_pin1)
|
||||
printk(KERN_ERR "..MP-BIOS bug: "
|
||||
"8254 timer not connected to IO-APIC\n");
|
||||
apic_printk(APIC_QUIET, KERN_ERR "..MP-BIOS bug: "
|
||||
"8254 timer not connected to IO-APIC\n");
|
||||
|
||||
printk(KERN_INFO "...trying to set up timer (IRQ0) "
|
||||
"through the 8259A ... ");
|
||||
printk("\n..... (found pin %d) ...", pin2);
|
||||
apic_printk(APIC_QUIET, KERN_INFO "...trying to set up timer "
|
||||
"(IRQ0) through the 8259A ...\n");
|
||||
apic_printk(APIC_QUIET, KERN_INFO
|
||||
"..... (found apic %d pin %d) ...\n", apic2, pin2);
|
||||
/*
|
||||
* legacy devices should be connected to IO APIC #0
|
||||
*/
|
||||
|
@ -2230,7 +2232,7 @@ static inline void __init check_timer(void)
|
|||
unmask_IO_APIC_irq(0);
|
||||
enable_8259A_irq(0);
|
||||
if (timer_irq_works()) {
|
||||
printk("works.\n");
|
||||
apic_printk(APIC_QUIET, KERN_INFO "....... works.\n");
|
||||
timer_through_8259 = 1;
|
||||
if (nmi_watchdog == NMI_IO_APIC) {
|
||||
disable_8259A_irq(0);
|
||||
|
@ -2244,44 +2246,47 @@ static inline void __init check_timer(void)
|
|||
*/
|
||||
disable_8259A_irq(0);
|
||||
clear_IO_APIC_pin(apic2, pin2);
|
||||
printk(" failed.\n");
|
||||
apic_printk(APIC_QUIET, KERN_INFO "....... failed.\n");
|
||||
}
|
||||
|
||||
if (nmi_watchdog == NMI_IO_APIC) {
|
||||
printk(KERN_WARNING "timer doesn't work through the IO-APIC - disabling NMI Watchdog!\n");
|
||||
apic_printk(APIC_QUIET, KERN_WARNING "timer doesn't work "
|
||||
"through the IO-APIC - disabling NMI Watchdog!\n");
|
||||
nmi_watchdog = NMI_NONE;
|
||||
}
|
||||
timer_ack = 0;
|
||||
|
||||
printk(KERN_INFO "...trying to set up timer as Virtual Wire IRQ...");
|
||||
apic_printk(APIC_QUIET, KERN_INFO
|
||||
"...trying to set up timer as Virtual Wire IRQ...\n");
|
||||
|
||||
lapic_register_intr(0, vector);
|
||||
apic_write_around(APIC_LVT0, APIC_DM_FIXED | vector); /* Fixed mode */
|
||||
apic_write(APIC_LVT0, APIC_DM_FIXED | vector); /* Fixed mode */
|
||||
enable_8259A_irq(0);
|
||||
|
||||
if (timer_irq_works()) {
|
||||
printk(" works.\n");
|
||||
apic_printk(APIC_QUIET, KERN_INFO "..... works.\n");
|
||||
goto out;
|
||||
}
|
||||
disable_8259A_irq(0);
|
||||
apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | vector);
|
||||
printk(" failed.\n");
|
||||
apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | vector);
|
||||
apic_printk(APIC_QUIET, KERN_INFO "..... failed.\n");
|
||||
|
||||
printk(KERN_INFO "...trying to set up timer as ExtINT IRQ...");
|
||||
apic_printk(APIC_QUIET, KERN_INFO
|
||||
"...trying to set up timer as ExtINT IRQ...\n");
|
||||
|
||||
init_8259A(0);
|
||||
make_8259A_irq(0);
|
||||
apic_write_around(APIC_LVT0, APIC_DM_EXTINT);
|
||||
apic_write(APIC_LVT0, APIC_DM_EXTINT);
|
||||
|
||||
unlock_ExtINT_logic();
|
||||
|
||||
if (timer_irq_works()) {
|
||||
printk(" works.\n");
|
||||
apic_printk(APIC_QUIET, KERN_INFO "..... works.\n");
|
||||
goto out;
|
||||
}
|
||||
printk(" failed :(.\n");
|
||||
apic_printk(APIC_QUIET, KERN_INFO "..... failed :(.\n");
|
||||
panic("IO-APIC + timer doesn't work! Boot with apic=debug and send a "
|
||||
"report. Then try booting with the 'noapic' option");
|
||||
"report. Then try booting with the 'noapic' option.\n");
|
||||
out:
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
|
|
|
@ -45,6 +45,7 @@
|
|||
#include <asm/proto.h>
|
||||
#include <asm/acpi.h>
|
||||
#include <asm/dma.h>
|
||||
#include <asm/i8259.h>
|
||||
#include <asm/nmi.h>
|
||||
#include <asm/msidef.h>
|
||||
#include <asm/hypertransport.h>
|
||||
|
@ -1696,8 +1697,9 @@ static inline void __init check_timer(void)
|
|||
pin2 = ioapic_i8259.pin;
|
||||
apic2 = ioapic_i8259.apic;
|
||||
|
||||
apic_printk(APIC_VERBOSE,KERN_INFO "..TIMER: vector=0x%02X apic1=%d pin1=%d apic2=%d pin2=%d\n",
|
||||
cfg->vector, apic1, pin1, apic2, pin2);
|
||||
apic_printk(APIC_QUIET, KERN_INFO "..TIMER: vector=0x%02X "
|
||||
"apic1=%d pin1=%d apic2=%d pin2=%d\n",
|
||||
cfg->vector, apic1, pin1, apic2, pin2);
|
||||
|
||||
/*
|
||||
* Some BIOS writers are clueless and report the ExtINTA
|
||||
|
@ -1735,14 +1737,13 @@ static inline void __init check_timer(void)
|
|||
}
|
||||
clear_IO_APIC_pin(apic1, pin1);
|
||||
if (!no_pin1)
|
||||
apic_printk(APIC_QUIET,KERN_ERR "..MP-BIOS bug: "
|
||||
apic_printk(APIC_QUIET, KERN_ERR "..MP-BIOS bug: "
|
||||
"8254 timer not connected to IO-APIC\n");
|
||||
|
||||
apic_printk(APIC_VERBOSE,KERN_INFO
|
||||
"...trying to set up timer (IRQ0) "
|
||||
"through the 8259A ... ");
|
||||
apic_printk(APIC_VERBOSE,"\n..... (found apic %d pin %d) ...",
|
||||
apic2, pin2);
|
||||
apic_printk(APIC_QUIET, KERN_INFO "...trying to set up timer "
|
||||
"(IRQ0) through the 8259A ...\n");
|
||||
apic_printk(APIC_QUIET, KERN_INFO
|
||||
"..... (found apic %d pin %d) ...\n", apic2, pin2);
|
||||
/*
|
||||
* legacy devices should be connected to IO APIC #0
|
||||
*/
|
||||
|
@ -1751,7 +1752,7 @@ static inline void __init check_timer(void)
|
|||
unmask_IO_APIC_irq(0);
|
||||
enable_8259A_irq(0);
|
||||
if (timer_irq_works()) {
|
||||
apic_printk(APIC_VERBOSE," works.\n");
|
||||
apic_printk(APIC_QUIET, KERN_INFO "....... works.\n");
|
||||
timer_through_8259 = 1;
|
||||
if (nmi_watchdog == NMI_IO_APIC) {
|
||||
disable_8259A_irq(0);
|
||||
|
@ -1765,29 +1766,32 @@ static inline void __init check_timer(void)
|
|||
*/
|
||||
disable_8259A_irq(0);
|
||||
clear_IO_APIC_pin(apic2, pin2);
|
||||
apic_printk(APIC_VERBOSE," failed.\n");
|
||||
apic_printk(APIC_QUIET, KERN_INFO "....... failed.\n");
|
||||
}
|
||||
|
||||
if (nmi_watchdog == NMI_IO_APIC) {
|
||||
printk(KERN_WARNING "timer doesn't work through the IO-APIC - disabling NMI Watchdog!\n");
|
||||
apic_printk(APIC_QUIET, KERN_WARNING "timer doesn't work "
|
||||
"through the IO-APIC - disabling NMI Watchdog!\n");
|
||||
nmi_watchdog = NMI_NONE;
|
||||
}
|
||||
|
||||
apic_printk(APIC_VERBOSE, KERN_INFO "...trying to set up timer as Virtual Wire IRQ...");
|
||||
apic_printk(APIC_QUIET, KERN_INFO
|
||||
"...trying to set up timer as Virtual Wire IRQ...\n");
|
||||
|
||||
lapic_register_intr(0);
|
||||
apic_write(APIC_LVT0, APIC_DM_FIXED | cfg->vector); /* Fixed mode */
|
||||
enable_8259A_irq(0);
|
||||
|
||||
if (timer_irq_works()) {
|
||||
apic_printk(APIC_VERBOSE," works.\n");
|
||||
apic_printk(APIC_QUIET, KERN_INFO "..... works.\n");
|
||||
goto out;
|
||||
}
|
||||
disable_8259A_irq(0);
|
||||
apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | cfg->vector);
|
||||
apic_printk(APIC_VERBOSE," failed.\n");
|
||||
apic_printk(APIC_QUIET, KERN_INFO "..... failed.\n");
|
||||
|
||||
apic_printk(APIC_VERBOSE, KERN_INFO "...trying to set up timer as ExtINT IRQ...");
|
||||
apic_printk(APIC_QUIET, KERN_INFO
|
||||
"...trying to set up timer as ExtINT IRQ...\n");
|
||||
|
||||
init_8259A(0);
|
||||
make_8259A_irq(0);
|
||||
|
@ -1796,11 +1800,12 @@ static inline void __init check_timer(void)
|
|||
unlock_ExtINT_logic();
|
||||
|
||||
if (timer_irq_works()) {
|
||||
apic_printk(APIC_VERBOSE," works.\n");
|
||||
apic_printk(APIC_QUIET, KERN_INFO "..... works.\n");
|
||||
goto out;
|
||||
}
|
||||
apic_printk(APIC_VERBOSE," failed :(.\n");
|
||||
panic("IO-APIC + timer doesn't work! Try using the 'noapic' kernel parameter\n");
|
||||
apic_printk(APIC_QUIET, KERN_INFO "..... failed :(.\n");
|
||||
panic("IO-APIC + timer doesn't work! Boot with apic=debug and send a "
|
||||
"report. Then try booting with the 'noapic' option.\n");
|
||||
out:
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
|
|
|
@ -103,6 +103,9 @@ void __init io_delay_init(void)
|
|||
|
||||
static int __init io_delay_param(char *s)
|
||||
{
|
||||
if (!s)
|
||||
return -EINVAL;
|
||||
|
||||
if (!strcmp(s, "0x80"))
|
||||
io_delay_type = CONFIG_IO_DELAY_TYPE_0X80;
|
||||
else if (!strcmp(s, "0xed"))
|
||||
|
|
|
@ -70,7 +70,7 @@ void __send_IPI_shortcut(unsigned int shortcut, int vector)
|
|||
/*
|
||||
* Send the IPI. The write to APIC_ICR fires this off.
|
||||
*/
|
||||
apic_write_around(APIC_ICR, cfg);
|
||||
apic_write(APIC_ICR, cfg);
|
||||
}
|
||||
|
||||
void send_IPI_self(int vector)
|
||||
|
@ -98,7 +98,7 @@ static inline void __send_IPI_dest_field(unsigned long mask, int vector)
|
|||
* prepare target chip field
|
||||
*/
|
||||
cfg = __prepare_ICR2(mask);
|
||||
apic_write_around(APIC_ICR2, cfg);
|
||||
apic_write(APIC_ICR2, cfg);
|
||||
|
||||
/*
|
||||
* program the ICR
|
||||
|
@ -108,7 +108,7 @@ static inline void __send_IPI_dest_field(unsigned long mask, int vector)
|
|||
/*
|
||||
* Send the IPI. The write to APIC_ICR fires this off.
|
||||
*/
|
||||
apic_write_around(APIC_ICR, cfg);
|
||||
apic_write(APIC_ICR, cfg);
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
|
@ -83,11 +83,8 @@ union irq_ctx {
|
|||
static union irq_ctx *hardirq_ctx[NR_CPUS] __read_mostly;
|
||||
static union irq_ctx *softirq_ctx[NR_CPUS] __read_mostly;
|
||||
|
||||
static char softirq_stack[NR_CPUS * THREAD_SIZE]
|
||||
__attribute__((__section__(".bss.page_aligned")));
|
||||
|
||||
static char hardirq_stack[NR_CPUS * THREAD_SIZE]
|
||||
__attribute__((__section__(".bss.page_aligned")));
|
||||
static char softirq_stack[NR_CPUS * THREAD_SIZE] __page_aligned_bss;
|
||||
static char hardirq_stack[NR_CPUS * THREAD_SIZE] __page_aligned_bss;
|
||||
|
||||
static void call_on_stack(void *func, void *stack)
|
||||
{
|
||||
|
|
|
@ -12,9 +12,13 @@
|
|||
#include <linux/init.h>
|
||||
#include <linux/io.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/module.h>
|
||||
|
||||
#include <asm/setup.h>
|
||||
|
||||
struct dentry *arch_debugfs_dir;
|
||||
EXPORT_SYMBOL(arch_debugfs_dir);
|
||||
|
||||
#ifdef CONFIG_DEBUG_BOOT_PARAMS
|
||||
struct setup_data_node {
|
||||
u64 paddr;
|
||||
|
@ -209,6 +213,10 @@ static int __init arch_kdebugfs_init(void)
|
|||
{
|
||||
int error = 0;
|
||||
|
||||
arch_debugfs_dir = debugfs_create_dir("x86", NULL);
|
||||
if (!arch_debugfs_dir)
|
||||
return -ENOMEM;
|
||||
|
||||
#ifdef CONFIG_DEBUG_BOOT_PARAMS
|
||||
error = boot_params_kdebugfs_init();
|
||||
#endif
|
||||
|
|
|
@ -860,7 +860,6 @@ static int __kprobes post_kprobe_handler(struct pt_regs *regs)
|
|||
|
||||
resume_execution(cur, regs, kcb);
|
||||
regs->flags |= kcb->kprobe_saved_flags;
|
||||
trace_hardirqs_fixup_flags(regs->flags);
|
||||
|
||||
if ((kcb->kprobe_status != KPROBE_REENTER) && cur->post_handler) {
|
||||
kcb->kprobe_status = KPROBE_HIT_SSDONE;
|
||||
|
|
|
@ -150,7 +150,8 @@ int module_finalize(const Elf_Ehdr *hdr,
|
|||
const Elf_Shdr *sechdrs,
|
||||
struct module *me)
|
||||
{
|
||||
const Elf_Shdr *s, *text = NULL, *alt = NULL, *locks = NULL;
|
||||
const Elf_Shdr *s, *text = NULL, *alt = NULL, *locks = NULL,
|
||||
*para = NULL;
|
||||
char *secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset;
|
||||
|
||||
for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) {
|
||||
|
@ -160,6 +161,8 @@ int module_finalize(const Elf_Ehdr *hdr,
|
|||
alt = s;
|
||||
if (!strcmp(".smp_locks", secstrings + s->sh_name))
|
||||
locks= s;
|
||||
if (!strcmp(".parainstructions", secstrings + s->sh_name))
|
||||
para = s;
|
||||
}
|
||||
|
||||
if (alt) {
|
||||
|
@ -175,6 +178,11 @@ int module_finalize(const Elf_Ehdr *hdr,
|
|||
tseg, tseg + text->sh_size);
|
||||
}
|
||||
|
||||
if (para) {
|
||||
void *pseg = (void *)para->sh_addr;
|
||||
apply_paravirt(pseg, pseg + para->sh_size);
|
||||
}
|
||||
|
||||
return module_bug_finalize(hdr, sechdrs, me);
|
||||
}
|
||||
|
||||
|
|
|
@ -27,6 +27,7 @@
|
|||
#include <asm/bios_ebda.h>
|
||||
#include <asm/e820.h>
|
||||
#include <asm/trampoline.h>
|
||||
#include <asm/setup.h>
|
||||
|
||||
#include <mach_apic.h>
|
||||
#ifdef CONFIG_X86_32
|
||||
|
@ -48,76 +49,6 @@ static int __init mpf_checksum(unsigned char *mp, int len)
|
|||
return sum & 0xFF;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_X86_NUMAQ
|
||||
int found_numaq;
|
||||
/*
|
||||
* Have to match translation table entries to main table entries by counter
|
||||
* hence the mpc_record variable .... can't see a less disgusting way of
|
||||
* doing this ....
|
||||
*/
|
||||
struct mpc_config_translation {
|
||||
unsigned char mpc_type;
|
||||
unsigned char trans_len;
|
||||
unsigned char trans_type;
|
||||
unsigned char trans_quad;
|
||||
unsigned char trans_global;
|
||||
unsigned char trans_local;
|
||||
unsigned short trans_reserved;
|
||||
};
|
||||
|
||||
|
||||
static int mpc_record;
|
||||
static struct mpc_config_translation *translation_table[MAX_MPC_ENTRY]
|
||||
__cpuinitdata;
|
||||
|
||||
static inline int generate_logical_apicid(int quad, int phys_apicid)
|
||||
{
|
||||
return (quad << 4) + (phys_apicid ? phys_apicid << 1 : 1);
|
||||
}
|
||||
|
||||
|
||||
static inline int mpc_apic_id(struct mpc_config_processor *m,
|
||||
struct mpc_config_translation *translation_record)
|
||||
{
|
||||
int quad = translation_record->trans_quad;
|
||||
int logical_apicid = generate_logical_apicid(quad, m->mpc_apicid);
|
||||
|
||||
printk(KERN_DEBUG "Processor #%d %u:%u APIC version %d (quad %d, apic %d)\n",
|
||||
m->mpc_apicid,
|
||||
(m->mpc_cpufeature & CPU_FAMILY_MASK) >> 8,
|
||||
(m->mpc_cpufeature & CPU_MODEL_MASK) >> 4,
|
||||
m->mpc_apicver, quad, logical_apicid);
|
||||
return logical_apicid;
|
||||
}
|
||||
|
||||
int mp_bus_id_to_node[MAX_MP_BUSSES];
|
||||
|
||||
int mp_bus_id_to_local[MAX_MP_BUSSES];
|
||||
|
||||
static void mpc_oem_bus_info(struct mpc_config_bus *m, char *name,
|
||||
struct mpc_config_translation *translation)
|
||||
{
|
||||
int quad = translation->trans_quad;
|
||||
int local = translation->trans_local;
|
||||
|
||||
mp_bus_id_to_node[m->mpc_busid] = quad;
|
||||
mp_bus_id_to_local[m->mpc_busid] = local;
|
||||
printk(KERN_INFO "Bus #%d is %s (node %d)\n",
|
||||
m->mpc_busid, name, quad);
|
||||
}
|
||||
|
||||
int quad_local_to_mp_bus_id [NR_CPUS/4][4];
|
||||
static void mpc_oem_pci_bus(struct mpc_config_bus *m,
|
||||
struct mpc_config_translation *translation)
|
||||
{
|
||||
int quad = translation->trans_quad;
|
||||
int local = translation->trans_local;
|
||||
|
||||
quad_local_to_mp_bus_id[quad][local] = m->mpc_busid;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
static void __cpuinit MP_processor_info(struct mpc_config_processor *m)
|
||||
{
|
||||
int apicid;
|
||||
|
@ -127,14 +58,12 @@ static void __cpuinit MP_processor_info(struct mpc_config_processor *m)
|
|||
disabled_cpus++;
|
||||
return;
|
||||
}
|
||||
#ifdef CONFIG_X86_NUMAQ
|
||||
if (found_numaq)
|
||||
apicid = mpc_apic_id(m, translation_table[mpc_record]);
|
||||
|
||||
if (x86_quirks->mpc_apic_id)
|
||||
apicid = x86_quirks->mpc_apic_id(m);
|
||||
else
|
||||
apicid = m->mpc_apicid;
|
||||
#else
|
||||
apicid = m->mpc_apicid;
|
||||
#endif
|
||||
|
||||
if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) {
|
||||
bootup_cpu = " (Bootup-CPU)";
|
||||
boot_cpu_physical_apicid = m->mpc_apicid;
|
||||
|
@ -151,12 +80,10 @@ static void __init MP_bus_info(struct mpc_config_bus *m)
|
|||
memcpy(str, m->mpc_bustype, 6);
|
||||
str[6] = 0;
|
||||
|
||||
#ifdef CONFIG_X86_NUMAQ
|
||||
if (found_numaq)
|
||||
mpc_oem_bus_info(m, str, translation_table[mpc_record]);
|
||||
#else
|
||||
printk(KERN_INFO "Bus #%d is %s\n", m->mpc_busid, str);
|
||||
#endif
|
||||
if (x86_quirks->mpc_oem_bus_info)
|
||||
x86_quirks->mpc_oem_bus_info(m, str);
|
||||
else
|
||||
printk(KERN_INFO "Bus #%d is %s\n", m->mpc_busid, str);
|
||||
|
||||
#if MAX_MP_BUSSES < 256
|
||||
if (m->mpc_busid >= MAX_MP_BUSSES) {
|
||||
|
@ -173,10 +100,9 @@ static void __init MP_bus_info(struct mpc_config_bus *m)
|
|||
mp_bus_id_to_type[m->mpc_busid] = MP_BUS_ISA;
|
||||
#endif
|
||||
} else if (strncmp(str, BUSTYPE_PCI, sizeof(BUSTYPE_PCI) - 1) == 0) {
|
||||
#ifdef CONFIG_X86_NUMAQ
|
||||
if (found_numaq)
|
||||
mpc_oem_pci_bus(m, translation_table[mpc_record]);
|
||||
#endif
|
||||
if (x86_quirks->mpc_oem_pci_bus)
|
||||
x86_quirks->mpc_oem_pci_bus(m);
|
||||
|
||||
clear_bit(m->mpc_busid, mp_bus_not_pci);
|
||||
#if defined(CONFIG_EISA) || defined (CONFIG_MCA)
|
||||
mp_bus_id_to_type[m->mpc_busid] = MP_BUS_PCI;
|
||||
|
@ -316,83 +242,6 @@ static void __init MP_lintsrc_info(struct mpc_config_lintsrc *m)
|
|||
m->mpc_srcbusirq, m->mpc_destapic, m->mpc_destapiclint);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_X86_NUMAQ
|
||||
static void __init MP_translation_info(struct mpc_config_translation *m)
|
||||
{
|
||||
printk(KERN_INFO
|
||||
"Translation: record %d, type %d, quad %d, global %d, local %d\n",
|
||||
mpc_record, m->trans_type, m->trans_quad, m->trans_global,
|
||||
m->trans_local);
|
||||
|
||||
if (mpc_record >= MAX_MPC_ENTRY)
|
||||
printk(KERN_ERR "MAX_MPC_ENTRY exceeded!\n");
|
||||
else
|
||||
translation_table[mpc_record] = m; /* stash this for later */
|
||||
if (m->trans_quad < MAX_NUMNODES && !node_online(m->trans_quad))
|
||||
node_set_online(m->trans_quad);
|
||||
}
|
||||
|
||||
/*
|
||||
* Read/parse the MPC oem tables
|
||||
*/
|
||||
|
||||
static void __init smp_read_mpc_oem(struct mp_config_oemtable *oemtable,
|
||||
unsigned short oemsize)
|
||||
{
|
||||
int count = sizeof(*oemtable); /* the header size */
|
||||
unsigned char *oemptr = ((unsigned char *)oemtable) + count;
|
||||
|
||||
mpc_record = 0;
|
||||
printk(KERN_INFO "Found an OEM MPC table at %8p - parsing it ... \n",
|
||||
oemtable);
|
||||
if (memcmp(oemtable->oem_signature, MPC_OEM_SIGNATURE, 4)) {
|
||||
printk(KERN_WARNING
|
||||
"SMP mpc oemtable: bad signature [%c%c%c%c]!\n",
|
||||
oemtable->oem_signature[0], oemtable->oem_signature[1],
|
||||
oemtable->oem_signature[2], oemtable->oem_signature[3]);
|
||||
return;
|
||||
}
|
||||
if (mpf_checksum((unsigned char *)oemtable, oemtable->oem_length)) {
|
||||
printk(KERN_WARNING "SMP oem mptable: checksum error!\n");
|
||||
return;
|
||||
}
|
||||
while (count < oemtable->oem_length) {
|
||||
switch (*oemptr) {
|
||||
case MP_TRANSLATION:
|
||||
{
|
||||
struct mpc_config_translation *m =
|
||||
(struct mpc_config_translation *)oemptr;
|
||||
MP_translation_info(m);
|
||||
oemptr += sizeof(*m);
|
||||
count += sizeof(*m);
|
||||
++mpc_record;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
{
|
||||
printk(KERN_WARNING
|
||||
"Unrecognised OEM table entry type! - %d\n",
|
||||
(int)*oemptr);
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void numaq_mps_oem_check(struct mp_config_table *mpc, char *oem,
|
||||
char *productid)
|
||||
{
|
||||
if (strncmp(oem, "IBM NUMA", 8))
|
||||
printk("Warning! Not a NUMA-Q system!\n");
|
||||
else
|
||||
found_numaq = 1;
|
||||
|
||||
if (mpc->mpc_oemptr)
|
||||
smp_read_mpc_oem((struct mp_config_oemtable *)mpc->mpc_oemptr,
|
||||
mpc->mpc_oemsize);
|
||||
}
|
||||
#endif /* CONFIG_X86_NUMAQ */
|
||||
|
||||
/*
|
||||
* Read/parse the MPC
|
||||
*/
|
||||
|
@ -457,7 +306,6 @@ static int __init smp_read_mpc(struct mp_config_table *mpc, unsigned early)
|
|||
} else
|
||||
mps_oem_check(mpc, oem, str);
|
||||
#endif
|
||||
|
||||
/* save the local APIC address, it might be non-default */
|
||||
if (!acpi_lapic)
|
||||
mp_lapic_addr = mpc->mpc_lapic;
|
||||
|
@ -465,12 +313,17 @@ static int __init smp_read_mpc(struct mp_config_table *mpc, unsigned early)
|
|||
if (early)
|
||||
return 1;
|
||||
|
||||
if (mpc->mpc_oemptr && x86_quirks->smp_read_mpc_oem) {
|
||||
struct mp_config_oemtable *oem_table = (struct mp_config_oemtable *)(unsigned long)mpc->mpc_oemptr;
|
||||
x86_quirks->smp_read_mpc_oem(oem_table, mpc->mpc_oemsize);
|
||||
}
|
||||
|
||||
/*
|
||||
* Now process the configuration blocks.
|
||||
*/
|
||||
#ifdef CONFIG_X86_NUMAQ
|
||||
mpc_record = 0;
|
||||
#endif
|
||||
if (x86_quirks->mpc_record)
|
||||
*x86_quirks->mpc_record = 0;
|
||||
|
||||
while (count < mpc->mpc_length) {
|
||||
switch (*mpt) {
|
||||
case MP_PROCESSOR:
|
||||
|
@ -536,9 +389,8 @@ static int __init smp_read_mpc(struct mp_config_table *mpc, unsigned early)
|
|||
count = mpc->mpc_length;
|
||||
break;
|
||||
}
|
||||
#ifdef CONFIG_X86_NUMAQ
|
||||
++mpc_record;
|
||||
#endif
|
||||
if (x86_quirks->mpc_record)
|
||||
(*x86_quirks->mpc_record)++;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_X86_GENERICARCH
|
||||
|
@ -725,12 +577,6 @@ static inline void __init construct_default_ISA_mptable(int mpc_default_type)
|
|||
|
||||
static struct intel_mp_floating *mpf_found;
|
||||
|
||||
/*
|
||||
* Machine specific quirk for finding the SMP config before other setup
|
||||
* activities destroy the table:
|
||||
*/
|
||||
int (*mach_get_smp_config_quirk)(unsigned int early);
|
||||
|
||||
/*
|
||||
* Scan the memory blocks for an SMP configuration block.
|
||||
*/
|
||||
|
@ -738,8 +584,8 @@ static void __init __get_smp_config(unsigned int early)
|
|||
{
|
||||
struct intel_mp_floating *mpf = mpf_found;
|
||||
|
||||
if (mach_get_smp_config_quirk) {
|
||||
if (mach_get_smp_config_quirk(early))
|
||||
if (x86_quirks->mach_get_smp_config) {
|
||||
if (x86_quirks->mach_get_smp_config(early))
|
||||
return;
|
||||
}
|
||||
if (acpi_lapic && early)
|
||||
|
@ -899,14 +745,12 @@ static int __init smp_scan_config(unsigned long base, unsigned long length,
|
|||
return 0;
|
||||
}
|
||||
|
||||
int (*mach_find_smp_config_quirk)(unsigned int reserve);
|
||||
|
||||
static void __init __find_smp_config(unsigned int reserve)
|
||||
{
|
||||
unsigned int address;
|
||||
|
||||
if (mach_find_smp_config_quirk) {
|
||||
if (mach_find_smp_config_quirk(reserve))
|
||||
if (x86_quirks->mach_find_smp_config) {
|
||||
if (x86_quirks->mach_find_smp_config(reserve))
|
||||
return;
|
||||
}
|
||||
/*
|
||||
|
|
|
@ -263,7 +263,7 @@ late_initcall(init_lapic_nmi_sysfs);
|
|||
|
||||
static void __acpi_nmi_enable(void *__unused)
|
||||
{
|
||||
apic_write_around(APIC_LVT0, APIC_DM_NMI);
|
||||
apic_write(APIC_LVT0, APIC_DM_NMI);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -277,7 +277,7 @@ void acpi_nmi_enable(void)
|
|||
|
||||
static void __acpi_nmi_disable(void *__unused)
|
||||
{
|
||||
apic_write_around(APIC_LVT0, APIC_DM_NMI | APIC_LVT_MASKED);
|
||||
apic_write(APIC_LVT0, APIC_DM_NMI | APIC_LVT_MASKED);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -448,6 +448,13 @@ nmi_watchdog_tick(struct pt_regs *regs, unsigned reason)
|
|||
|
||||
#ifdef CONFIG_SYSCTL
|
||||
|
||||
static int __init setup_unknown_nmi_panic(char *str)
|
||||
{
|
||||
unknown_nmi_panic = 1;
|
||||
return 1;
|
||||
}
|
||||
__setup("unknown_nmi_panic", setup_unknown_nmi_panic);
|
||||
|
||||
static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu)
|
||||
{
|
||||
unsigned char reason = get_nmi_reason();
|
||||
|
|
|
@ -33,6 +33,7 @@
|
|||
#include <asm/processor.h>
|
||||
#include <asm/mpspec.h>
|
||||
#include <asm/e820.h>
|
||||
#include <asm/setup.h>
|
||||
|
||||
#define MB_TO_PAGES(addr) ((addr) << (20 - PAGE_SHIFT))
|
||||
|
||||
|
@ -71,27 +72,6 @@ static void __init smp_dump_qct(void)
|
|||
}
|
||||
}
|
||||
|
||||
static __init void early_check_numaq(void)
|
||||
{
|
||||
/*
|
||||
* Find possible boot-time SMP configuration:
|
||||
*/
|
||||
early_find_smp_config();
|
||||
/*
|
||||
* get boot-time SMP configuration:
|
||||
*/
|
||||
if (smp_found_config)
|
||||
early_get_smp_config();
|
||||
}
|
||||
|
||||
int __init get_memcfg_numaq(void)
|
||||
{
|
||||
early_check_numaq();
|
||||
if (!found_numaq)
|
||||
return 0;
|
||||
smp_dump_qct();
|
||||
return 1;
|
||||
}
|
||||
|
||||
void __init numaq_tsc_disable(void)
|
||||
{
|
||||
|
@ -103,3 +83,198 @@ void __init numaq_tsc_disable(void)
|
|||
setup_clear_cpu_cap(X86_FEATURE_TSC);
|
||||
}
|
||||
}
|
||||
|
||||
static int __init numaq_pre_time_init(void)
|
||||
{
|
||||
numaq_tsc_disable();
|
||||
return 0;
|
||||
}
|
||||
|
||||
int found_numaq;
|
||||
/*
|
||||
* Have to match translation table entries to main table entries by counter
|
||||
* hence the mpc_record variable .... can't see a less disgusting way of
|
||||
* doing this ....
|
||||
*/
|
||||
struct mpc_config_translation {
|
||||
unsigned char mpc_type;
|
||||
unsigned char trans_len;
|
||||
unsigned char trans_type;
|
||||
unsigned char trans_quad;
|
||||
unsigned char trans_global;
|
||||
unsigned char trans_local;
|
||||
unsigned short trans_reserved;
|
||||
};
|
||||
|
||||
/* x86_quirks member */
|
||||
static int mpc_record;
|
||||
static struct mpc_config_translation *translation_table[MAX_MPC_ENTRY]
|
||||
__cpuinitdata;
|
||||
|
||||
static inline int generate_logical_apicid(int quad, int phys_apicid)
|
||||
{
|
||||
return (quad << 4) + (phys_apicid ? phys_apicid << 1 : 1);
|
||||
}
|
||||
|
||||
/* x86_quirks member */
|
||||
static int mpc_apic_id(struct mpc_config_processor *m)
|
||||
{
|
||||
int quad = translation_table[mpc_record]->trans_quad;
|
||||
int logical_apicid = generate_logical_apicid(quad, m->mpc_apicid);
|
||||
|
||||
printk(KERN_DEBUG "Processor #%d %u:%u APIC version %d (quad %d, apic %d)\n",
|
||||
m->mpc_apicid,
|
||||
(m->mpc_cpufeature & CPU_FAMILY_MASK) >> 8,
|
||||
(m->mpc_cpufeature & CPU_MODEL_MASK) >> 4,
|
||||
m->mpc_apicver, quad, logical_apicid);
|
||||
return logical_apicid;
|
||||
}
|
||||
|
||||
int mp_bus_id_to_node[MAX_MP_BUSSES];
|
||||
|
||||
int mp_bus_id_to_local[MAX_MP_BUSSES];
|
||||
|
||||
/* x86_quirks member */
|
||||
static void mpc_oem_bus_info(struct mpc_config_bus *m, char *name)
|
||||
{
|
||||
int quad = translation_table[mpc_record]->trans_quad;
|
||||
int local = translation_table[mpc_record]->trans_local;
|
||||
|
||||
mp_bus_id_to_node[m->mpc_busid] = quad;
|
||||
mp_bus_id_to_local[m->mpc_busid] = local;
|
||||
printk(KERN_INFO "Bus #%d is %s (node %d)\n",
|
||||
m->mpc_busid, name, quad);
|
||||
}
|
||||
|
||||
int quad_local_to_mp_bus_id [NR_CPUS/4][4];
|
||||
|
||||
/* x86_quirks member */
|
||||
static void mpc_oem_pci_bus(struct mpc_config_bus *m)
|
||||
{
|
||||
int quad = translation_table[mpc_record]->trans_quad;
|
||||
int local = translation_table[mpc_record]->trans_local;
|
||||
|
||||
quad_local_to_mp_bus_id[quad][local] = m->mpc_busid;
|
||||
}
|
||||
|
||||
static void __init MP_translation_info(struct mpc_config_translation *m)
|
||||
{
|
||||
printk(KERN_INFO
|
||||
"Translation: record %d, type %d, quad %d, global %d, local %d\n",
|
||||
mpc_record, m->trans_type, m->trans_quad, m->trans_global,
|
||||
m->trans_local);
|
||||
|
||||
if (mpc_record >= MAX_MPC_ENTRY)
|
||||
printk(KERN_ERR "MAX_MPC_ENTRY exceeded!\n");
|
||||
else
|
||||
translation_table[mpc_record] = m; /* stash this for later */
|
||||
if (m->trans_quad < MAX_NUMNODES && !node_online(m->trans_quad))
|
||||
node_set_online(m->trans_quad);
|
||||
}
|
||||
|
||||
static int __init mpf_checksum(unsigned char *mp, int len)
|
||||
{
|
||||
int sum = 0;
|
||||
|
||||
while (len--)
|
||||
sum += *mp++;
|
||||
|
||||
return sum & 0xFF;
|
||||
}
|
||||
|
||||
/*
|
||||
* Read/parse the MPC oem tables
|
||||
*/
|
||||
|
||||
static void __init smp_read_mpc_oem(struct mp_config_oemtable *oemtable,
|
||||
unsigned short oemsize)
|
||||
{
|
||||
int count = sizeof(*oemtable); /* the header size */
|
||||
unsigned char *oemptr = ((unsigned char *)oemtable) + count;
|
||||
|
||||
mpc_record = 0;
|
||||
printk(KERN_INFO "Found an OEM MPC table at %8p - parsing it ... \n",
|
||||
oemtable);
|
||||
if (memcmp(oemtable->oem_signature, MPC_OEM_SIGNATURE, 4)) {
|
||||
printk(KERN_WARNING
|
||||
"SMP mpc oemtable: bad signature [%c%c%c%c]!\n",
|
||||
oemtable->oem_signature[0], oemtable->oem_signature[1],
|
||||
oemtable->oem_signature[2], oemtable->oem_signature[3]);
|
||||
return;
|
||||
}
|
||||
if (mpf_checksum((unsigned char *)oemtable, oemtable->oem_length)) {
|
||||
printk(KERN_WARNING "SMP oem mptable: checksum error!\n");
|
||||
return;
|
||||
}
|
||||
while (count < oemtable->oem_length) {
|
||||
switch (*oemptr) {
|
||||
case MP_TRANSLATION:
|
||||
{
|
||||
struct mpc_config_translation *m =
|
||||
(struct mpc_config_translation *)oemptr;
|
||||
MP_translation_info(m);
|
||||
oemptr += sizeof(*m);
|
||||
count += sizeof(*m);
|
||||
++mpc_record;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
{
|
||||
printk(KERN_WARNING
|
||||
"Unrecognised OEM table entry type! - %d\n",
|
||||
(int)*oemptr);
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static struct x86_quirks numaq_x86_quirks __initdata = {
|
||||
.arch_pre_time_init = numaq_pre_time_init,
|
||||
.arch_time_init = NULL,
|
||||
.arch_pre_intr_init = NULL,
|
||||
.arch_memory_setup = NULL,
|
||||
.arch_intr_init = NULL,
|
||||
.arch_trap_init = NULL,
|
||||
.mach_get_smp_config = NULL,
|
||||
.mach_find_smp_config = NULL,
|
||||
.mpc_record = &mpc_record,
|
||||
.mpc_apic_id = mpc_apic_id,
|
||||
.mpc_oem_bus_info = mpc_oem_bus_info,
|
||||
.mpc_oem_pci_bus = mpc_oem_pci_bus,
|
||||
.smp_read_mpc_oem = smp_read_mpc_oem,
|
||||
};
|
||||
|
||||
void numaq_mps_oem_check(struct mp_config_table *mpc, char *oem,
|
||||
char *productid)
|
||||
{
|
||||
if (strncmp(oem, "IBM NUMA", 8))
|
||||
printk("Warning! Not a NUMA-Q system!\n");
|
||||
else
|
||||
found_numaq = 1;
|
||||
}
|
||||
|
||||
static __init void early_check_numaq(void)
|
||||
{
|
||||
/*
|
||||
* Find possible boot-time SMP configuration:
|
||||
*/
|
||||
early_find_smp_config();
|
||||
/*
|
||||
* get boot-time SMP configuration:
|
||||
*/
|
||||
if (smp_found_config)
|
||||
early_get_smp_config();
|
||||
|
||||
if (found_numaq)
|
||||
x86_quirks = &numaq_x86_quirks;
|
||||
}
|
||||
|
||||
int __init get_memcfg_numaq(void)
|
||||
{
|
||||
early_check_numaq();
|
||||
if (!found_numaq)
|
||||
return 0;
|
||||
smp_dump_qct();
|
||||
return 1;
|
||||
}
|
||||
|
|
|
@ -29,6 +29,7 @@
|
|||
#include <asm/desc.h>
|
||||
#include <asm/setup.h>
|
||||
#include <asm/arch_hooks.h>
|
||||
#include <asm/pgtable.h>
|
||||
#include <asm/time.h>
|
||||
#include <asm/pgalloc.h>
|
||||
#include <asm/irq.h>
|
||||
|
@ -361,7 +362,6 @@ struct pv_cpu_ops pv_cpu_ops = {
|
|||
struct pv_apic_ops pv_apic_ops = {
|
||||
#ifdef CONFIG_X86_LOCAL_APIC
|
||||
.apic_write = native_apic_write,
|
||||
.apic_write_atomic = native_apic_write_atomic,
|
||||
.apic_read = native_apic_read,
|
||||
.setup_boot_clock = setup_boot_APIC_clock,
|
||||
.setup_secondary_clock = setup_secondary_APIC_clock,
|
||||
|
@ -373,6 +373,9 @@ struct pv_mmu_ops pv_mmu_ops = {
|
|||
#ifndef CONFIG_X86_64
|
||||
.pagetable_setup_start = native_pagetable_setup_start,
|
||||
.pagetable_setup_done = native_pagetable_setup_done,
|
||||
#else
|
||||
.pagetable_setup_start = paravirt_nop,
|
||||
.pagetable_setup_done = paravirt_nop,
|
||||
#endif
|
||||
|
||||
.read_cr2 = native_read_cr2,
|
||||
|
|
|
@ -36,7 +36,7 @@
|
|||
#include <linux/delay.h>
|
||||
#include <linux/scatterlist.h>
|
||||
#include <linux/iommu-helper.h>
|
||||
#include <asm/gart.h>
|
||||
#include <asm/iommu.h>
|
||||
#include <asm/calgary.h>
|
||||
#include <asm/tce.h>
|
||||
#include <asm/pci-direct.h>
|
||||
|
|
|
@ -5,12 +5,11 @@
|
|||
|
||||
#include <asm/proto.h>
|
||||
#include <asm/dma.h>
|
||||
#include <asm/gart.h>
|
||||
#include <asm/iommu.h>
|
||||
#include <asm/calgary.h>
|
||||
#include <asm/amd_iommu.h>
|
||||
|
||||
int forbid_dac __read_mostly;
|
||||
EXPORT_SYMBOL(forbid_dac);
|
||||
static int forbid_dac __read_mostly;
|
||||
|
||||
const struct dma_mapping_ops *dma_ops;
|
||||
EXPORT_SYMBOL(dma_ops);
|
||||
|
@ -114,21 +113,15 @@ void __init pci_iommu_alloc(void)
|
|||
* The order of these functions is important for
|
||||
* fall-back/fail-over reasons
|
||||
*/
|
||||
#ifdef CONFIG_GART_IOMMU
|
||||
gart_iommu_hole_init();
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_CALGARY_IOMMU
|
||||
detect_calgary();
|
||||
#endif
|
||||
|
||||
detect_intel_iommu();
|
||||
|
||||
amd_iommu_detect();
|
||||
|
||||
#ifdef CONFIG_SWIOTLB
|
||||
pci_swiotlb_init();
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
|
@ -184,9 +177,7 @@ static __init int iommu_setup(char *p)
|
|||
swiotlb = 1;
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_GART_IOMMU
|
||||
gart_parse_options(p);
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_CALGARY_IOMMU
|
||||
if (!strncmp(p, "calgary", 7))
|
||||
|
@ -500,17 +491,13 @@ EXPORT_SYMBOL(dma_free_coherent);
|
|||
|
||||
static int __init pci_iommu_init(void)
|
||||
{
|
||||
#ifdef CONFIG_CALGARY_IOMMU
|
||||
calgary_iommu_init();
|
||||
#endif
|
||||
|
||||
intel_iommu_init();
|
||||
|
||||
amd_iommu_init();
|
||||
|
||||
#ifdef CONFIG_GART_IOMMU
|
||||
gart_iommu_init();
|
||||
#endif
|
||||
|
||||
no_iommu_init();
|
||||
return 0;
|
||||
|
|
|
@ -32,6 +32,7 @@
|
|||
#include <asm/mtrr.h>
|
||||
#include <asm/pgtable.h>
|
||||
#include <asm/proto.h>
|
||||
#include <asm/iommu.h>
|
||||
#include <asm/gart.h>
|
||||
#include <asm/cacheflush.h>
|
||||
#include <asm/swiotlb.h>
|
||||
|
|
|
@ -7,7 +7,7 @@
|
|||
#include <linux/dma-mapping.h>
|
||||
#include <linux/scatterlist.h>
|
||||
|
||||
#include <asm/gart.h>
|
||||
#include <asm/iommu.h>
|
||||
#include <asm/processor.h>
|
||||
#include <asm/dma.h>
|
||||
|
||||
|
|
|
@ -5,7 +5,7 @@
|
|||
#include <linux/module.h>
|
||||
#include <linux/dma-mapping.h>
|
||||
|
||||
#include <asm/gart.h>
|
||||
#include <asm/iommu.h>
|
||||
#include <asm/swiotlb.h>
|
||||
#include <asm/dma.h>
|
||||
|
||||
|
|
|
@ -15,6 +15,7 @@ unsigned long idle_nomwait;
|
|||
EXPORT_SYMBOL(idle_nomwait);
|
||||
|
||||
struct kmem_cache *task_xstate_cachep;
|
||||
static int force_mwait __cpuinitdata;
|
||||
|
||||
int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
|
||||
{
|
||||
|
@ -199,6 +200,7 @@ static void poll_idle(void)
|
|||
*
|
||||
* idle=mwait overrides this decision and forces the usage of mwait.
|
||||
*/
|
||||
static int __cpuinitdata force_mwait;
|
||||
|
||||
#define MWAIT_INFO 0x05
|
||||
#define MWAIT_ECX_EXTENDED_INFO 0x01
|
||||
|
@ -326,6 +328,9 @@ void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
|
|||
|
||||
static int __init idle_setup(char *str)
|
||||
{
|
||||
if (!str)
|
||||
return -EINVAL;
|
||||
|
||||
if (!strcmp(str, "poll")) {
|
||||
printk("using polling idle threads.\n");
|
||||
pm_idle = poll_idle;
|
||||
|
|
|
@ -537,8 +537,8 @@ static inline void __switch_to_xtra(struct task_struct *prev_p,
|
|||
struct task_struct *
|
||||
__switch_to(struct task_struct *prev_p, struct task_struct *next_p)
|
||||
{
|
||||
struct thread_struct *prev = &prev_p->thread,
|
||||
*next = &next_p->thread;
|
||||
struct thread_struct *prev = &prev_p->thread;
|
||||
struct thread_struct *next = &next_p->thread;
|
||||
int cpu = smp_processor_id();
|
||||
struct tss_struct *tss = &per_cpu(init_tss, cpu);
|
||||
unsigned fsindex, gsindex;
|
||||
|
@ -586,35 +586,34 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
|
|||
|
||||
/*
|
||||
* Switch FS and GS.
|
||||
*
|
||||
* Segment register != 0 always requires a reload. Also
|
||||
* reload when it has changed. When prev process used 64bit
|
||||
* base always reload to avoid an information leak.
|
||||
*/
|
||||
{
|
||||
/* segment register != 0 always requires a reload.
|
||||
also reload when it has changed.
|
||||
when prev process used 64bit base always reload
|
||||
to avoid an information leak. */
|
||||
if (unlikely(fsindex | next->fsindex | prev->fs)) {
|
||||
loadsegment(fs, next->fsindex);
|
||||
/* check if the user used a selector != 0
|
||||
* if yes clear 64bit base, since overloaded base
|
||||
* is always mapped to the Null selector
|
||||
*/
|
||||
if (fsindex)
|
||||
if (unlikely(fsindex | next->fsindex | prev->fs)) {
|
||||
loadsegment(fs, next->fsindex);
|
||||
/*
|
||||
* Check if the user used a selector != 0; if yes
|
||||
* clear 64bit base, since overloaded base is always
|
||||
* mapped to the Null selector
|
||||
*/
|
||||
if (fsindex)
|
||||
prev->fs = 0;
|
||||
}
|
||||
/* when next process has a 64bit base use it */
|
||||
if (next->fs)
|
||||
wrmsrl(MSR_FS_BASE, next->fs);
|
||||
prev->fsindex = fsindex;
|
||||
|
||||
if (unlikely(gsindex | next->gsindex | prev->gs)) {
|
||||
load_gs_index(next->gsindex);
|
||||
if (gsindex)
|
||||
prev->gs = 0;
|
||||
}
|
||||
if (next->gs)
|
||||
wrmsrl(MSR_KERNEL_GS_BASE, next->gs);
|
||||
prev->gsindex = gsindex;
|
||||
}
|
||||
/* when next process has a 64bit base use it */
|
||||
if (next->fs)
|
||||
wrmsrl(MSR_FS_BASE, next->fs);
|
||||
prev->fsindex = fsindex;
|
||||
|
||||
if (unlikely(gsindex | next->gsindex | prev->gs)) {
|
||||
load_gs_index(next->gsindex);
|
||||
if (gsindex)
|
||||
prev->gs = 0;
|
||||
}
|
||||
if (next->gs)
|
||||
wrmsrl(MSR_KERNEL_GS_BASE, next->gs);
|
||||
prev->gsindex = gsindex;
|
||||
|
||||
/* Must be after DS reload */
|
||||
unlazy_fpu(prev_p);
|
||||
|
@ -627,7 +626,8 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
|
|||
write_pda(pcurrent, next_p);
|
||||
|
||||
write_pda(kernelstack,
|
||||
(unsigned long)task_stack_page(next_p) + THREAD_SIZE - PDA_STACKOFFSET);
|
||||
(unsigned long)task_stack_page(next_p) +
|
||||
THREAD_SIZE - PDA_STACKOFFSET);
|
||||
#ifdef CONFIG_CC_STACKPROTECTOR
|
||||
write_pda(stack_canary, next_p->stack_canary);
|
||||
/*
|
||||
|
|
|
@ -1357,8 +1357,6 @@ const struct user_regset_view *task_user_regset_view(struct task_struct *task)
|
|||
#endif
|
||||
}
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
|
||||
void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, int error_code)
|
||||
{
|
||||
struct siginfo info;
|
||||
|
@ -1377,89 +1375,10 @@ void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, int error_code)
|
|||
force_sig_info(SIGTRAP, &info, tsk);
|
||||
}
|
||||
|
||||
/* notification of system call entry/exit
|
||||
* - triggered by current->work.syscall_trace
|
||||
*/
|
||||
int do_syscall_trace(struct pt_regs *regs, int entryexit)
|
||||
{
|
||||
int is_sysemu = test_thread_flag(TIF_SYSCALL_EMU);
|
||||
/*
|
||||
* With TIF_SYSCALL_EMU set we want to ignore TIF_SINGLESTEP for syscall
|
||||
* interception
|
||||
*/
|
||||
int is_singlestep = !is_sysemu && test_thread_flag(TIF_SINGLESTEP);
|
||||
int ret = 0;
|
||||
|
||||
/* do the secure computing check first */
|
||||
if (!entryexit)
|
||||
secure_computing(regs->orig_ax);
|
||||
|
||||
if (unlikely(current->audit_context)) {
|
||||
if (entryexit)
|
||||
audit_syscall_exit(AUDITSC_RESULT(regs->ax),
|
||||
regs->ax);
|
||||
/* Debug traps, when using PTRACE_SINGLESTEP, must be sent only
|
||||
* on the syscall exit path. Normally, when TIF_SYSCALL_AUDIT is
|
||||
* not used, entry.S will call us only on syscall exit, not
|
||||
* entry; so when TIF_SYSCALL_AUDIT is used we must avoid
|
||||
* calling send_sigtrap() on syscall entry.
|
||||
*
|
||||
* Note that when PTRACE_SYSEMU_SINGLESTEP is used,
|
||||
* is_singlestep is false, despite his name, so we will still do
|
||||
* the correct thing.
|
||||
*/
|
||||
else if (is_singlestep)
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (!(current->ptrace & PT_PTRACED))
|
||||
goto out;
|
||||
|
||||
/* If a process stops on the 1st tracepoint with SYSCALL_TRACE
|
||||
* and then is resumed with SYSEMU_SINGLESTEP, it will come in
|
||||
* here. We have to check this and return */
|
||||
if (is_sysemu && entryexit)
|
||||
return 0;
|
||||
|
||||
/* Fake a debug trap */
|
||||
if (is_singlestep)
|
||||
send_sigtrap(current, regs, 0);
|
||||
|
||||
if (!test_thread_flag(TIF_SYSCALL_TRACE) && !is_sysemu)
|
||||
goto out;
|
||||
|
||||
/* the 0x80 provides a way for the tracing parent to distinguish
|
||||
between a syscall stop and SIGTRAP delivery */
|
||||
/* Note that the debugger could change the result of test_thread_flag!*/
|
||||
ptrace_notify(SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD) ? 0x80:0));
|
||||
|
||||
/*
|
||||
* this isn't the same as continuing with a signal, but it will do
|
||||
* for normal use. strace only continues with a signal if the
|
||||
* stopping signal is not SIGTRAP. -brl
|
||||
*/
|
||||
if (current->exit_code) {
|
||||
send_sig(current->exit_code, current, 1);
|
||||
current->exit_code = 0;
|
||||
}
|
||||
ret = is_sysemu;
|
||||
out:
|
||||
if (unlikely(current->audit_context) && !entryexit)
|
||||
audit_syscall_entry(AUDIT_ARCH_I386, regs->orig_ax,
|
||||
regs->bx, regs->cx, regs->dx, regs->si);
|
||||
if (ret == 0)
|
||||
return 0;
|
||||
|
||||
regs->orig_ax = -1; /* force skip of syscall restarting */
|
||||
if (unlikely(current->audit_context))
|
||||
audit_syscall_exit(AUDITSC_RESULT(regs->ax), regs->ax);
|
||||
return 1;
|
||||
}
|
||||
|
||||
#else /* CONFIG_X86_64 */
|
||||
|
||||
static void syscall_trace(struct pt_regs *regs)
|
||||
{
|
||||
if (!(current->ptrace & PT_PTRACED))
|
||||
return;
|
||||
|
||||
#if 0
|
||||
printk("trace %s ip %lx sp %lx ax %d origrax %d caller %lx tiflags %x ptrace %x\n",
|
||||
|
@ -1481,39 +1400,81 @@ static void syscall_trace(struct pt_regs *regs)
|
|||
}
|
||||
}
|
||||
|
||||
asmlinkage void syscall_trace_enter(struct pt_regs *regs)
|
||||
#ifdef CONFIG_X86_32
|
||||
# define IS_IA32 1
|
||||
#elif defined CONFIG_IA32_EMULATION
|
||||
# define IS_IA32 test_thread_flag(TIF_IA32)
|
||||
#else
|
||||
# define IS_IA32 0
|
||||
#endif
|
||||
|
||||
/*
|
||||
* We must return the syscall number to actually look up in the table.
|
||||
* This can be -1L to skip running any syscall at all.
|
||||
*/
|
||||
asmregparm long syscall_trace_enter(struct pt_regs *regs)
|
||||
{
|
||||
long ret = 0;
|
||||
|
||||
/*
|
||||
* If we stepped into a sysenter/syscall insn, it trapped in
|
||||
* kernel mode; do_debug() cleared TF and set TIF_SINGLESTEP.
|
||||
* If user-mode had set TF itself, then it's still clear from
|
||||
* do_debug() and we need to set it again to restore the user
|
||||
* state. If we entered on the slow path, TF was already set.
|
||||
*/
|
||||
if (test_thread_flag(TIF_SINGLESTEP))
|
||||
regs->flags |= X86_EFLAGS_TF;
|
||||
|
||||
/* do the secure computing check first */
|
||||
secure_computing(regs->orig_ax);
|
||||
|
||||
if (test_thread_flag(TIF_SYSCALL_TRACE)
|
||||
&& (current->ptrace & PT_PTRACED))
|
||||
if (unlikely(test_thread_flag(TIF_SYSCALL_EMU)))
|
||||
ret = -1L;
|
||||
|
||||
if (ret || test_thread_flag(TIF_SYSCALL_TRACE))
|
||||
syscall_trace(regs);
|
||||
|
||||
if (unlikely(current->audit_context)) {
|
||||
if (test_thread_flag(TIF_IA32)) {
|
||||
if (IS_IA32)
|
||||
audit_syscall_entry(AUDIT_ARCH_I386,
|
||||
regs->orig_ax,
|
||||
regs->bx, regs->cx,
|
||||
regs->dx, regs->si);
|
||||
} else {
|
||||
#ifdef CONFIG_X86_64
|
||||
else
|
||||
audit_syscall_entry(AUDIT_ARCH_X86_64,
|
||||
regs->orig_ax,
|
||||
regs->di, regs->si,
|
||||
regs->dx, regs->r10);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
return ret ?: regs->orig_ax;
|
||||
}
|
||||
|
||||
asmlinkage void syscall_trace_leave(struct pt_regs *regs)
|
||||
asmregparm void syscall_trace_leave(struct pt_regs *regs)
|
||||
{
|
||||
if (unlikely(current->audit_context))
|
||||
audit_syscall_exit(AUDITSC_RESULT(regs->ax), regs->ax);
|
||||
|
||||
if ((test_thread_flag(TIF_SYSCALL_TRACE)
|
||||
|| test_thread_flag(TIF_SINGLESTEP))
|
||||
&& (current->ptrace & PT_PTRACED))
|
||||
if (test_thread_flag(TIF_SYSCALL_TRACE))
|
||||
syscall_trace(regs);
|
||||
}
|
||||
|
||||
#endif /* CONFIG_X86_32 */
|
||||
/*
|
||||
* If TIF_SYSCALL_EMU is set, we only get here because of
|
||||
* TIF_SINGLESTEP (i.e. this is PTRACE_SYSEMU_SINGLESTEP).
|
||||
* We already reported this syscall instruction in
|
||||
* syscall_trace_enter(), so don't do any more now.
|
||||
*/
|
||||
if (unlikely(test_thread_flag(TIF_SYSCALL_EMU)))
|
||||
return;
|
||||
|
||||
/*
|
||||
* If we are single-stepping, synthesize a trap to follow the
|
||||
* system call instruction.
|
||||
*/
|
||||
if (test_thread_flag(TIF_SINGLESTEP) &&
|
||||
(current->ptrace & PT_PTRACED))
|
||||
send_sigtrap(current, regs, 0);
|
||||
}
|
||||
|
|
|
@ -177,6 +177,14 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = {
|
|||
DMI_MATCH(DMI_PRODUCT_NAME, "PowerEdge 2400"),
|
||||
},
|
||||
},
|
||||
{ /* Handle problems with rebooting on Dell T5400's */
|
||||
.callback = set_bios_reboot,
|
||||
.ident = "Dell Precision T5400",
|
||||
.matches = {
|
||||
DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
|
||||
DMI_MATCH(DMI_PRODUCT_NAME, "Precision WorkStation T5400"),
|
||||
},
|
||||
},
|
||||
{ /* Handle problems with rebooting on HP laptops */
|
||||
.callback = set_bios_reboot,
|
||||
.ident = "HP Compaq Laptop",
|
||||
|
|
|
@ -57,12 +57,8 @@
|
|||
#include <linux/slab.h>
|
||||
#include <linux/user.h>
|
||||
#include <linux/delay.h>
|
||||
#include <linux/highmem.h>
|
||||
|
||||
#include <linux/kallsyms.h>
|
||||
#include <linux/edd.h>
|
||||
#include <linux/iscsi_ibft.h>
|
||||
#include <linux/kexec.h>
|
||||
#include <linux/cpufreq.h>
|
||||
#include <linux/dma-mapping.h>
|
||||
#include <linux/ctype.h>
|
||||
|
@ -96,7 +92,7 @@
|
|||
#include <asm/smp.h>
|
||||
#include <asm/desc.h>
|
||||
#include <asm/dma.h>
|
||||
#include <asm/gart.h>
|
||||
#include <asm/iommu.h>
|
||||
#include <asm/mmu_context.h>
|
||||
#include <asm/proto.h>
|
||||
|
||||
|
@ -104,7 +100,6 @@
|
|||
#include <asm/paravirt.h>
|
||||
|
||||
#include <asm/percpu.h>
|
||||
#include <asm/sections.h>
|
||||
#include <asm/topology.h>
|
||||
#include <asm/apicdef.h>
|
||||
#ifdef CONFIG_X86_64
|
||||
|
@ -579,6 +574,10 @@ static int __init setup_elfcorehdr(char *arg)
|
|||
early_param("elfcorehdr", setup_elfcorehdr);
|
||||
#endif
|
||||
|
||||
static struct x86_quirks default_x86_quirks __initdata;
|
||||
|
||||
struct x86_quirks *x86_quirks __initdata = &default_x86_quirks;
|
||||
|
||||
/*
|
||||
* Determine if we were loaded by an EFI loader. If so, then we have also been
|
||||
* passed the efi memmap, systab, etc., so we should use these data structures
|
||||
|
@ -824,7 +823,10 @@ void __init setup_arch(char **cmdline_p)
|
|||
vmi_init();
|
||||
#endif
|
||||
|
||||
paravirt_pagetable_setup_start(swapper_pg_dir);
|
||||
paging_init();
|
||||
paravirt_pagetable_setup_done(swapper_pg_dir);
|
||||
paravirt_post_allocator_init();
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
map_vsyscall();
|
||||
|
@ -854,14 +856,6 @@ void __init setup_arch(char **cmdline_p)
|
|||
init_cpu_to_node();
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_X86_NUMAQ
|
||||
/*
|
||||
* need to check online nodes num, call it
|
||||
* here before time_init/tsc_init
|
||||
*/
|
||||
numaq_tsc_disable();
|
||||
#endif
|
||||
|
||||
init_apic_mappings();
|
||||
ioapic_init_mappings();
|
||||
|
||||
|
|
|
@ -212,7 +212,7 @@ asmlinkage unsigned long sys_sigreturn(unsigned long __unused)
|
|||
|
||||
badframe:
|
||||
if (show_unhandled_signals && printk_ratelimit()) {
|
||||
printk(KERN_INFO "%s%s[%d] bad frame in sigreturn frame:"
|
||||
printk("%s%s[%d] bad frame in sigreturn frame:"
|
||||
"%p ip:%lx sp:%lx oeax:%lx",
|
||||
task_pid_nr(current) > 1 ? KERN_INFO : KERN_EMERG,
|
||||
current->comm, task_pid_nr(current), frame, regs->ip,
|
||||
|
@ -657,12 +657,6 @@ static void do_signal(struct pt_regs *regs)
|
|||
void
|
||||
do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
|
||||
{
|
||||
/* Pending single-step? */
|
||||
if (thread_info_flags & _TIF_SINGLESTEP) {
|
||||
regs->flags |= X86_EFLAGS_TF;
|
||||
clear_thread_flag(TIF_SINGLESTEP);
|
||||
}
|
||||
|
||||
/* deal with pending signal delivery */
|
||||
if (thread_info_flags & _TIF_SIGPENDING)
|
||||
do_signal(regs);
|
||||
|
|
|
@ -487,12 +487,6 @@ static void do_signal(struct pt_regs *regs)
|
|||
void do_notify_resume(struct pt_regs *regs, void *unused,
|
||||
__u32 thread_info_flags)
|
||||
{
|
||||
/* Pending single-step? */
|
||||
if (thread_info_flags & _TIF_SINGLESTEP) {
|
||||
regs->flags |= X86_EFLAGS_TF;
|
||||
clear_thread_flag(TIF_SINGLESTEP);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_X86_MCE
|
||||
/* notify userspace of pending MCEs */
|
||||
if (thread_info_flags & _TIF_MCE_NOTIFY)
|
||||
|
|
|
@ -546,8 +546,8 @@ static inline void __inquire_remote_apic(int apicid)
|
|||
printk(KERN_CONT
|
||||
"a previous APIC delivery may have failed\n");
|
||||
|
||||
apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(apicid));
|
||||
apic_write_around(APIC_ICR, APIC_DM_REMRD | regs[i]);
|
||||
apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(apicid));
|
||||
apic_write(APIC_ICR, APIC_DM_REMRD | regs[i]);
|
||||
|
||||
timeout = 0;
|
||||
do {
|
||||
|
@ -579,11 +579,11 @@ wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip)
|
|||
int maxlvt;
|
||||
|
||||
/* Target chip */
|
||||
apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(logical_apicid));
|
||||
apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(logical_apicid));
|
||||
|
||||
/* Boot on the stack */
|
||||
/* Kick the second */
|
||||
apic_write_around(APIC_ICR, APIC_DM_NMI | APIC_DEST_LOGICAL);
|
||||
apic_write(APIC_ICR, APIC_DM_NMI | APIC_DEST_LOGICAL);
|
||||
|
||||
Dprintk("Waiting for send to finish...\n");
|
||||
send_status = safe_apic_wait_icr_idle();
|
||||
|
@ -592,14 +592,9 @@ wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip)
|
|||
* Give the other CPU some time to accept the IPI.
|
||||
*/
|
||||
udelay(200);
|
||||
/*
|
||||
* Due to the Pentium erratum 3AP.
|
||||
*/
|
||||
maxlvt = lapic_get_maxlvt();
|
||||
if (maxlvt > 3) {
|
||||
apic_read_around(APIC_SPIV);
|
||||
if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */
|
||||
apic_write(APIC_ESR, 0);
|
||||
}
|
||||
accept_status = (apic_read(APIC_ESR) & 0xEF);
|
||||
Dprintk("NMI sent.\n");
|
||||
|
||||
|
@ -625,12 +620,14 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
|
|||
return send_status;
|
||||
}
|
||||
|
||||
maxlvt = lapic_get_maxlvt();
|
||||
|
||||
/*
|
||||
* Be paranoid about clearing APIC errors.
|
||||
*/
|
||||
if (APIC_INTEGRATED(apic_version[phys_apicid])) {
|
||||
apic_read_around(APIC_SPIV);
|
||||
apic_write(APIC_ESR, 0);
|
||||
if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */
|
||||
apic_write(APIC_ESR, 0);
|
||||
apic_read(APIC_ESR);
|
||||
}
|
||||
|
||||
|
@ -639,13 +636,13 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
|
|||
/*
|
||||
* Turn INIT on target chip
|
||||
*/
|
||||
apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
|
||||
apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
|
||||
|
||||
/*
|
||||
* Send IPI
|
||||
*/
|
||||
apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_INT_ASSERT
|
||||
| APIC_DM_INIT);
|
||||
apic_write(APIC_ICR,
|
||||
APIC_INT_LEVELTRIG | APIC_INT_ASSERT | APIC_DM_INIT);
|
||||
|
||||
Dprintk("Waiting for send to finish...\n");
|
||||
send_status = safe_apic_wait_icr_idle();
|
||||
|
@ -655,10 +652,10 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
|
|||
Dprintk("Deasserting INIT.\n");
|
||||
|
||||
/* Target chip */
|
||||
apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
|
||||
apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
|
||||
|
||||
/* Send IPI */
|
||||
apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_DM_INIT);
|
||||
apic_write(APIC_ICR, APIC_INT_LEVELTRIG | APIC_DM_INIT);
|
||||
|
||||
Dprintk("Waiting for send to finish...\n");
|
||||
send_status = safe_apic_wait_icr_idle();
|
||||
|
@ -689,12 +686,10 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
|
|||
*/
|
||||
Dprintk("#startup loops: %d.\n", num_starts);
|
||||
|
||||
maxlvt = lapic_get_maxlvt();
|
||||
|
||||
for (j = 1; j <= num_starts; j++) {
|
||||
Dprintk("Sending STARTUP #%d.\n", j);
|
||||
apic_read_around(APIC_SPIV);
|
||||
apic_write(APIC_ESR, 0);
|
||||
if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */
|
||||
apic_write(APIC_ESR, 0);
|
||||
apic_read(APIC_ESR);
|
||||
Dprintk("After apic_write.\n");
|
||||
|
||||
|
@ -703,12 +698,11 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
|
|||
*/
|
||||
|
||||
/* Target chip */
|
||||
apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
|
||||
apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
|
||||
|
||||
/* Boot on the stack */
|
||||
/* Kick the second */
|
||||
apic_write_around(APIC_ICR, APIC_DM_STARTUP
|
||||
| (start_eip >> 12));
|
||||
apic_write(APIC_ICR, APIC_DM_STARTUP | (start_eip >> 12));
|
||||
|
||||
/*
|
||||
* Give the other CPU some time to accept the IPI.
|
||||
|
@ -724,13 +718,8 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
|
|||
* Give the other CPU some time to accept the IPI.
|
||||
*/
|
||||
udelay(200);
|
||||
/*
|
||||
* Due to the Pentium erratum 3AP.
|
||||
*/
|
||||
if (maxlvt > 3) {
|
||||
apic_read_around(APIC_SPIV);
|
||||
if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */
|
||||
apic_write(APIC_ESR, 0);
|
||||
}
|
||||
accept_status = (apic_read(APIC_ESR) & 0xEF);
|
||||
if (send_status || accept_status)
|
||||
break;
|
||||
|
@ -768,7 +757,7 @@ static void __cpuinit do_fork_idle(struct work_struct *work)
|
|||
*
|
||||
* Must be called after the _cpu_pda pointer table is initialized.
|
||||
*/
|
||||
static int __cpuinit get_local_pda(int cpu)
|
||||
int __cpuinit get_local_pda(int cpu)
|
||||
{
|
||||
struct x8664_pda *oldpda, *newpda;
|
||||
unsigned long size = sizeof(struct x8664_pda);
|
||||
|
@ -1390,7 +1379,8 @@ static int __init parse_maxcpus(char *arg)
|
|||
{
|
||||
extern unsigned int maxcpus;
|
||||
|
||||
maxcpus = simple_strtoul(arg, NULL, 0);
|
||||
if (arg)
|
||||
maxcpus = simple_strtoul(arg, NULL, 0);
|
||||
return 0;
|
||||
}
|
||||
early_param("maxcpus", parse_maxcpus);
|
||||
|
|
|
@ -105,6 +105,20 @@ static int is_setting_trap_flag(struct task_struct *child, struct pt_regs *regs)
|
|||
static int enable_single_step(struct task_struct *child)
|
||||
{
|
||||
struct pt_regs *regs = task_pt_regs(child);
|
||||
unsigned long oflags;
|
||||
|
||||
/*
|
||||
* If we stepped into a sysenter/syscall insn, it trapped in
|
||||
* kernel mode; do_debug() cleared TF and set TIF_SINGLESTEP.
|
||||
* If user-mode had set TF itself, then it's still clear from
|
||||
* do_debug() and we need to set it again to restore the user
|
||||
* state so we don't wrongly set TIF_FORCED_TF below.
|
||||
* If enable_single_step() was used last and that is what
|
||||
* set TIF_SINGLESTEP, then both TF and TIF_FORCED_TF are
|
||||
* already set and our bookkeeping is fine.
|
||||
*/
|
||||
if (unlikely(test_tsk_thread_flag(child, TIF_SINGLESTEP)))
|
||||
regs->flags |= X86_EFLAGS_TF;
|
||||
|
||||
/*
|
||||
* Always set TIF_SINGLESTEP - this guarantees that
|
||||
|
@ -113,11 +127,7 @@ static int enable_single_step(struct task_struct *child)
|
|||
*/
|
||||
set_tsk_thread_flag(child, TIF_SINGLESTEP);
|
||||
|
||||
/*
|
||||
* If TF was already set, don't do anything else
|
||||
*/
|
||||
if (regs->flags & X86_EFLAGS_TF)
|
||||
return 0;
|
||||
oflags = regs->flags;
|
||||
|
||||
/* Set TF on the kernel stack.. */
|
||||
regs->flags |= X86_EFLAGS_TF;
|
||||
|
@ -126,9 +136,22 @@ static int enable_single_step(struct task_struct *child)
|
|||
* ..but if TF is changed by the instruction we will trace,
|
||||
* don't mark it as being "us" that set it, so that we
|
||||
* won't clear it by hand later.
|
||||
*
|
||||
* Note that if we don't actually execute the popf because
|
||||
* of a signal arriving right now or suchlike, we will lose
|
||||
* track of the fact that it really was "us" that set it.
|
||||
*/
|
||||
if (is_setting_trap_flag(child, regs))
|
||||
if (is_setting_trap_flag(child, regs)) {
|
||||
clear_tsk_thread_flag(child, TIF_FORCED_TF);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* If TF was already set, check whether it was us who set it.
|
||||
* If not, we should never attempt a block step.
|
||||
*/
|
||||
if (oflags & X86_EFLAGS_TF)
|
||||
return test_tsk_thread_flag(child, TIF_FORCED_TF);
|
||||
|
||||
set_tsk_thread_flag(child, TIF_FORCED_TF);
|
||||
|
||||
|
|
|
@ -129,6 +129,7 @@ void __init hpet_time_init(void)
|
|||
*/
|
||||
void __init time_init(void)
|
||||
{
|
||||
pre_time_init_hook();
|
||||
tsc_init();
|
||||
late_time_init = choose_time_init();
|
||||
}
|
||||
|
|
|
@ -58,6 +58,7 @@
|
|||
#include <asm/nmi.h>
|
||||
#include <asm/smp.h>
|
||||
#include <asm/io.h>
|
||||
#include <asm/traps.h>
|
||||
|
||||
#include "mach_traps.h"
|
||||
|
||||
|
@ -77,26 +78,6 @@ char ignore_fpu_irq;
|
|||
gate_desc idt_table[256]
|
||||
__attribute__((__section__(".data.idt"))) = { { { { 0, 0 } } }, };
|
||||
|
||||
asmlinkage void divide_error(void);
|
||||
asmlinkage void debug(void);
|
||||
asmlinkage void nmi(void);
|
||||
asmlinkage void int3(void);
|
||||
asmlinkage void overflow(void);
|
||||
asmlinkage void bounds(void);
|
||||
asmlinkage void invalid_op(void);
|
||||
asmlinkage void device_not_available(void);
|
||||
asmlinkage void coprocessor_segment_overrun(void);
|
||||
asmlinkage void invalid_TSS(void);
|
||||
asmlinkage void segment_not_present(void);
|
||||
asmlinkage void stack_segment(void);
|
||||
asmlinkage void general_protection(void);
|
||||
asmlinkage void page_fault(void);
|
||||
asmlinkage void coprocessor_error(void);
|
||||
asmlinkage void simd_coprocessor_error(void);
|
||||
asmlinkage void alignment_check(void);
|
||||
asmlinkage void spurious_interrupt_bug(void);
|
||||
asmlinkage void machine_check(void);
|
||||
|
||||
int panic_on_unrecovered_nmi;
|
||||
int kstack_depth_to_print = 24;
|
||||
static unsigned int code_bytes = 64;
|
||||
|
@ -256,7 +237,7 @@ static const struct stacktrace_ops print_trace_ops = {
|
|||
|
||||
static void
|
||||
show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
|
||||
unsigned long *stack, unsigned long bp, char *log_lvl)
|
||||
unsigned long *stack, unsigned long bp, char *log_lvl)
|
||||
{
|
||||
dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl);
|
||||
printk("%s =======================\n", log_lvl);
|
||||
|
@ -383,6 +364,54 @@ int is_valid_bugaddr(unsigned long ip)
|
|||
return ud2 == 0x0b0f;
|
||||
}
|
||||
|
||||
static raw_spinlock_t die_lock = __RAW_SPIN_LOCK_UNLOCKED;
|
||||
static int die_owner = -1;
|
||||
static unsigned int die_nest_count;
|
||||
|
||||
unsigned __kprobes long oops_begin(void)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
oops_enter();
|
||||
|
||||
if (die_owner != raw_smp_processor_id()) {
|
||||
console_verbose();
|
||||
raw_local_irq_save(flags);
|
||||
__raw_spin_lock(&die_lock);
|
||||
die_owner = smp_processor_id();
|
||||
die_nest_count = 0;
|
||||
bust_spinlocks(1);
|
||||
} else {
|
||||
raw_local_irq_save(flags);
|
||||
}
|
||||
die_nest_count++;
|
||||
return flags;
|
||||
}
|
||||
|
||||
void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr)
|
||||
{
|
||||
bust_spinlocks(0);
|
||||
die_owner = -1;
|
||||
add_taint(TAINT_DIE);
|
||||
__raw_spin_unlock(&die_lock);
|
||||
raw_local_irq_restore(flags);
|
||||
|
||||
if (!regs)
|
||||
return;
|
||||
|
||||
if (kexec_should_crash(current))
|
||||
crash_kexec(regs);
|
||||
|
||||
if (in_interrupt())
|
||||
panic("Fatal exception in interrupt");
|
||||
|
||||
if (panic_on_oops)
|
||||
panic("Fatal exception");
|
||||
|
||||
oops_exit();
|
||||
do_exit(signr);
|
||||
}
|
||||
|
||||
int __kprobes __die(const char *str, struct pt_regs *regs, long err)
|
||||
{
|
||||
unsigned short ss;
|
||||
|
@ -423,31 +452,9 @@ int __kprobes __die(const char *str, struct pt_regs *regs, long err)
|
|||
*/
|
||||
void die(const char *str, struct pt_regs *regs, long err)
|
||||
{
|
||||
static struct {
|
||||
raw_spinlock_t lock;
|
||||
u32 lock_owner;
|
||||
int lock_owner_depth;
|
||||
} die = {
|
||||
.lock = __RAW_SPIN_LOCK_UNLOCKED,
|
||||
.lock_owner = -1,
|
||||
.lock_owner_depth = 0
|
||||
};
|
||||
unsigned long flags;
|
||||
unsigned long flags = oops_begin();
|
||||
|
||||
oops_enter();
|
||||
|
||||
if (die.lock_owner != raw_smp_processor_id()) {
|
||||
console_verbose();
|
||||
raw_local_irq_save(flags);
|
||||
__raw_spin_lock(&die.lock);
|
||||
die.lock_owner = smp_processor_id();
|
||||
die.lock_owner_depth = 0;
|
||||
bust_spinlocks(1);
|
||||
} else {
|
||||
raw_local_irq_save(flags);
|
||||
}
|
||||
|
||||
if (++die.lock_owner_depth < 3) {
|
||||
if (die_nest_count < 3) {
|
||||
report_bug(regs->ip, regs);
|
||||
|
||||
if (__die(str, regs, err))
|
||||
|
@ -456,26 +463,7 @@ void die(const char *str, struct pt_regs *regs, long err)
|
|||
printk(KERN_EMERG "Recursive die() failure, output suppressed\n");
|
||||
}
|
||||
|
||||
bust_spinlocks(0);
|
||||
die.lock_owner = -1;
|
||||
add_taint(TAINT_DIE);
|
||||
__raw_spin_unlock(&die.lock);
|
||||
raw_local_irq_restore(flags);
|
||||
|
||||
if (!regs)
|
||||
return;
|
||||
|
||||
if (kexec_should_crash(current))
|
||||
crash_kexec(regs);
|
||||
|
||||
if (in_interrupt())
|
||||
panic("Fatal exception in interrupt");
|
||||
|
||||
if (panic_on_oops)
|
||||
panic("Fatal exception");
|
||||
|
||||
oops_exit();
|
||||
do_exit(SIGSEGV);
|
||||
oops_end(flags, regs, SIGSEGV);
|
||||
}
|
||||
|
||||
static inline void
|
||||
|
|
|
@ -51,30 +51,10 @@
|
|||
#include <asm/pgalloc.h>
|
||||
#include <asm/proto.h>
|
||||
#include <asm/pda.h>
|
||||
#include <asm/traps.h>
|
||||
|
||||
#include <mach_traps.h>
|
||||
|
||||
asmlinkage void divide_error(void);
|
||||
asmlinkage void debug(void);
|
||||
asmlinkage void nmi(void);
|
||||
asmlinkage void int3(void);
|
||||
asmlinkage void overflow(void);
|
||||
asmlinkage void bounds(void);
|
||||
asmlinkage void invalid_op(void);
|
||||
asmlinkage void device_not_available(void);
|
||||
asmlinkage void double_fault(void);
|
||||
asmlinkage void coprocessor_segment_overrun(void);
|
||||
asmlinkage void invalid_TSS(void);
|
||||
asmlinkage void segment_not_present(void);
|
||||
asmlinkage void stack_segment(void);
|
||||
asmlinkage void general_protection(void);
|
||||
asmlinkage void page_fault(void);
|
||||
asmlinkage void coprocessor_error(void);
|
||||
asmlinkage void simd_coprocessor_error(void);
|
||||
asmlinkage void alignment_check(void);
|
||||
asmlinkage void spurious_interrupt_bug(void);
|
||||
asmlinkage void machine_check(void);
|
||||
|
||||
int panic_on_unrecovered_nmi;
|
||||
int kstack_depth_to_print = 12;
|
||||
static unsigned int code_bytes = 64;
|
||||
|
@ -355,17 +335,24 @@ static const struct stacktrace_ops print_trace_ops = {
|
|||
.address = print_trace_address,
|
||||
};
|
||||
|
||||
void show_trace(struct task_struct *task, struct pt_regs *regs,
|
||||
unsigned long *stack, unsigned long bp)
|
||||
static void
|
||||
show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
|
||||
unsigned long *stack, unsigned long bp, char *log_lvl)
|
||||
{
|
||||
printk("\nCall Trace:\n");
|
||||
dump_trace(task, regs, stack, bp, &print_trace_ops, NULL);
|
||||
dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl);
|
||||
printk("\n");
|
||||
}
|
||||
|
||||
void show_trace(struct task_struct *task, struct pt_regs *regs,
|
||||
unsigned long *stack, unsigned long bp)
|
||||
{
|
||||
show_trace_log_lvl(task, regs, stack, bp, "");
|
||||
}
|
||||
|
||||
static void
|
||||
_show_stack(struct task_struct *task, struct pt_regs *regs,
|
||||
unsigned long *sp, unsigned long bp)
|
||||
show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
|
||||
unsigned long *sp, unsigned long bp, char *log_lvl)
|
||||
{
|
||||
unsigned long *stack;
|
||||
int i;
|
||||
|
@ -399,12 +386,12 @@ _show_stack(struct task_struct *task, struct pt_regs *regs,
|
|||
printk(" %016lx", *stack++);
|
||||
touch_nmi_watchdog();
|
||||
}
|
||||
show_trace(task, regs, sp, bp);
|
||||
show_trace_log_lvl(task, regs, sp, bp, log_lvl);
|
||||
}
|
||||
|
||||
void show_stack(struct task_struct *task, unsigned long *sp)
|
||||
{
|
||||
_show_stack(task, NULL, sp, 0);
|
||||
show_stack_log_lvl(task, NULL, sp, 0, "");
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -454,7 +441,8 @@ void show_registers(struct pt_regs *regs)
|
|||
u8 *ip;
|
||||
|
||||
printk("Stack: ");
|
||||
_show_stack(NULL, regs, (unsigned long *)sp, regs->bp);
|
||||
show_stack_log_lvl(NULL, regs, (unsigned long *)sp,
|
||||
regs->bp, "");
|
||||
printk("\n");
|
||||
|
||||
printk(KERN_EMERG "Code: ");
|
||||
|
@ -518,7 +506,7 @@ unsigned __kprobes long oops_begin(void)
|
|||
}
|
||||
|
||||
void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr)
|
||||
{
|
||||
{
|
||||
die_owner = -1;
|
||||
bust_spinlocks(0);
|
||||
die_nest_count--;
|
||||
|
|
|
@ -73,7 +73,7 @@ int is_visws_box(void)
|
|||
return visws_board_type >= 0;
|
||||
}
|
||||
|
||||
static int __init visws_time_init_quirk(void)
|
||||
static int __init visws_time_init(void)
|
||||
{
|
||||
printk(KERN_INFO "Starting Cobalt Timer system clock\n");
|
||||
|
||||
|
@ -93,7 +93,7 @@ static int __init visws_time_init_quirk(void)
|
|||
return 0;
|
||||
}
|
||||
|
||||
static int __init visws_pre_intr_init_quirk(void)
|
||||
static int __init visws_pre_intr_init(void)
|
||||
{
|
||||
init_VISWS_APIC_irqs();
|
||||
|
||||
|
@ -114,7 +114,7 @@ EXPORT_SYMBOL(sgivwfb_mem_size);
|
|||
|
||||
long long mem_size __initdata = 0;
|
||||
|
||||
static char * __init visws_memory_setup_quirk(void)
|
||||
static char * __init visws_memory_setup(void)
|
||||
{
|
||||
long long gfx_mem_size = 8 * MB;
|
||||
|
||||
|
@ -176,7 +176,7 @@ static void visws_machine_power_off(void)
|
|||
outl(PIIX_SPECIAL_STOP, 0xCFC);
|
||||
}
|
||||
|
||||
static int __init visws_get_smp_config_quirk(unsigned int early)
|
||||
static int __init visws_get_smp_config(unsigned int early)
|
||||
{
|
||||
/*
|
||||
* Prevent MP-table parsing by the generic code:
|
||||
|
@ -192,7 +192,7 @@ extern unsigned int __cpuinitdata maxcpus;
|
|||
* No problem for Linux.
|
||||
*/
|
||||
|
||||
static void __init MP_processor_info (struct mpc_config_processor *m)
|
||||
static void __init MP_processor_info(struct mpc_config_processor *m)
|
||||
{
|
||||
int ver, logical_apicid;
|
||||
physid_mask_t apic_cpus;
|
||||
|
@ -232,7 +232,7 @@ static void __init MP_processor_info (struct mpc_config_processor *m)
|
|||
apic_version[m->mpc_apicid] = ver;
|
||||
}
|
||||
|
||||
int __init visws_find_smp_config_quirk(unsigned int reserve)
|
||||
static int __init visws_find_smp_config(unsigned int reserve)
|
||||
{
|
||||
struct mpc_config_processor *mp = phys_to_virt(CO_CPU_TAB_PHYS);
|
||||
unsigned short ncpus = readw(phys_to_virt(CO_CPU_NUM_PHYS));
|
||||
|
@ -258,7 +258,17 @@ int __init visws_find_smp_config_quirk(unsigned int reserve)
|
|||
return 1;
|
||||
}
|
||||
|
||||
extern int visws_trap_init_quirk(void);
|
||||
static int visws_trap_init(void);
|
||||
|
||||
static struct x86_quirks visws_x86_quirks __initdata = {
|
||||
.arch_time_init = visws_time_init,
|
||||
.arch_pre_intr_init = visws_pre_intr_init,
|
||||
.arch_memory_setup = visws_memory_setup,
|
||||
.arch_intr_init = NULL,
|
||||
.arch_trap_init = visws_trap_init,
|
||||
.mach_get_smp_config = visws_get_smp_config,
|
||||
.mach_find_smp_config = visws_find_smp_config,
|
||||
};
|
||||
|
||||
void __init visws_early_detect(void)
|
||||
{
|
||||
|
@ -272,16 +282,10 @@ void __init visws_early_detect(void)
|
|||
|
||||
/*
|
||||
* Install special quirks for timer, interrupt and memory setup:
|
||||
*/
|
||||
arch_time_init_quirk = visws_time_init_quirk;
|
||||
arch_pre_intr_init_quirk = visws_pre_intr_init_quirk;
|
||||
arch_memory_setup_quirk = visws_memory_setup_quirk;
|
||||
|
||||
/*
|
||||
* Fall back to generic behavior for traps:
|
||||
* Override generic MP-table parsing:
|
||||
*/
|
||||
arch_intr_init_quirk = NULL;
|
||||
arch_trap_init_quirk = visws_trap_init_quirk;
|
||||
x86_quirks = &visws_x86_quirks;
|
||||
|
||||
/*
|
||||
* Install reboot quirks:
|
||||
|
@ -294,12 +298,6 @@ void __init visws_early_detect(void)
|
|||
*/
|
||||
no_broadcast = 0;
|
||||
|
||||
/*
|
||||
* Override generic MP-table parsing:
|
||||
*/
|
||||
mach_get_smp_config_quirk = visws_get_smp_config_quirk;
|
||||
mach_find_smp_config_quirk = visws_find_smp_config_quirk;
|
||||
|
||||
#ifdef CONFIG_X86_IO_APIC
|
||||
/*
|
||||
* Turn off IO-APIC detection and initialization:
|
||||
|
@ -426,7 +424,7 @@ static __init void cobalt_init(void)
|
|||
co_apic_read(CO_APIC_ID));
|
||||
}
|
||||
|
||||
int __init visws_trap_init_quirk(void)
|
||||
static int __init visws_trap_init(void)
|
||||
{
|
||||
lithium_init();
|
||||
cobalt_init();
|
||||
|
|
|
@ -906,7 +906,6 @@ static inline int __init activate_vmi(void)
|
|||
#ifdef CONFIG_X86_LOCAL_APIC
|
||||
para_fill(pv_apic_ops.apic_read, APICRead);
|
||||
para_fill(pv_apic_ops.apic_write, APICWrite);
|
||||
para_fill(pv_apic_ops.apic_write_atomic, APICWrite);
|
||||
#endif
|
||||
|
||||
/*
|
||||
|
|
|
@ -991,7 +991,6 @@ __init void lguest_init(void)
|
|||
#ifdef CONFIG_X86_LOCAL_APIC
|
||||
/* apic read/write intercepts */
|
||||
pv_apic_ops.apic_write = lguest_apic_write;
|
||||
pv_apic_ops.apic_write_atomic = lguest_apic_write;
|
||||
pv_apic_ops.apic_read = lguest_apic_read;
|
||||
#endif
|
||||
|
||||
|
|
|
@ -10,14 +10,6 @@
|
|||
#include <asm/e820.h>
|
||||
#include <asm/setup.h>
|
||||
|
||||
/*
|
||||
* Any quirks to be performed to initialize timers/irqs/etc?
|
||||
*/
|
||||
int (*arch_time_init_quirk)(void);
|
||||
int (*arch_pre_intr_init_quirk)(void);
|
||||
int (*arch_intr_init_quirk)(void);
|
||||
int (*arch_trap_init_quirk)(void);
|
||||
|
||||
#ifdef CONFIG_HOTPLUG_CPU
|
||||
#define DEFAULT_SEND_IPI (1)
|
||||
#else
|
||||
|
@ -37,8 +29,8 @@ int no_broadcast=DEFAULT_SEND_IPI;
|
|||
**/
|
||||
void __init pre_intr_init_hook(void)
|
||||
{
|
||||
if (arch_pre_intr_init_quirk) {
|
||||
if (arch_pre_intr_init_quirk())
|
||||
if (x86_quirks->arch_pre_intr_init) {
|
||||
if (x86_quirks->arch_pre_intr_init())
|
||||
return;
|
||||
}
|
||||
init_ISA_irqs();
|
||||
|
@ -64,8 +56,8 @@ static struct irqaction irq2 = {
|
|||
**/
|
||||
void __init intr_init_hook(void)
|
||||
{
|
||||
if (arch_intr_init_quirk) {
|
||||
if (arch_intr_init_quirk())
|
||||
if (x86_quirks->arch_intr_init) {
|
||||
if (x86_quirks->arch_intr_init())
|
||||
return;
|
||||
}
|
||||
#ifdef CONFIG_X86_LOCAL_APIC
|
||||
|
@ -97,8 +89,8 @@ void __init pre_setup_arch_hook(void)
|
|||
**/
|
||||
void __init trap_init_hook(void)
|
||||
{
|
||||
if (arch_trap_init_quirk) {
|
||||
if (arch_trap_init_quirk())
|
||||
if (x86_quirks->arch_trap_init) {
|
||||
if (x86_quirks->arch_trap_init())
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
@ -110,6 +102,16 @@ static struct irqaction irq0 = {
|
|||
.name = "timer"
|
||||
};
|
||||
|
||||
/**
|
||||
* pre_time_init_hook - do any specific initialisations before.
|
||||
*
|
||||
**/
|
||||
void __init pre_time_init_hook(void)
|
||||
{
|
||||
if (x86_quirks->arch_pre_time_init)
|
||||
x86_quirks->arch_pre_time_init();
|
||||
}
|
||||
|
||||
/**
|
||||
* time_init_hook - do any specific initialisations for the system timer.
|
||||
*
|
||||
|
@ -119,13 +121,13 @@ static struct irqaction irq0 = {
|
|||
**/
|
||||
void __init time_init_hook(void)
|
||||
{
|
||||
if (arch_time_init_quirk) {
|
||||
if (x86_quirks->arch_time_init) {
|
||||
/*
|
||||
* A nonzero return code does not mean failure, it means
|
||||
* that the architecture quirk does not want any
|
||||
* generic (timer) setup to be performed after this:
|
||||
*/
|
||||
if (arch_time_init_quirk())
|
||||
if (x86_quirks->arch_time_init())
|
||||
return;
|
||||
}
|
||||
|
||||
|
|
|
@ -21,3 +21,4 @@ obj-$(CONFIG_K8_NUMA) += k8topology_64.o
|
|||
endif
|
||||
obj-$(CONFIG_ACPI_NUMA) += srat_$(BITS).o
|
||||
|
||||
obj-$(CONFIG_MEMTEST) += memtest.o
|
||||
|
|
|
@ -844,6 +844,9 @@ unsigned long __init_refok init_memory_mapping(unsigned long start,
|
|||
reserve_early(table_start << PAGE_SHIFT,
|
||||
table_end << PAGE_SHIFT, "PGTABLE");
|
||||
|
||||
if (!after_init_bootmem)
|
||||
early_memtest(start, end);
|
||||
|
||||
return end >> PAGE_SHIFT;
|
||||
}
|
||||
|
||||
|
@ -868,8 +871,6 @@ void __init paging_init(void)
|
|||
*/
|
||||
sparse_init();
|
||||
zone_sizes_init();
|
||||
|
||||
paravirt_post_allocator_init();
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
|
@ -517,118 +517,6 @@ static void __init init_gbpages(void)
|
|||
direct_gbpages = 0;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_MEMTEST
|
||||
|
||||
static void __init memtest(unsigned long start_phys, unsigned long size,
|
||||
unsigned pattern)
|
||||
{
|
||||
unsigned long i;
|
||||
unsigned long *start;
|
||||
unsigned long start_bad;
|
||||
unsigned long last_bad;
|
||||
unsigned long val;
|
||||
unsigned long start_phys_aligned;
|
||||
unsigned long count;
|
||||
unsigned long incr;
|
||||
|
||||
switch (pattern) {
|
||||
case 0:
|
||||
val = 0UL;
|
||||
break;
|
||||
case 1:
|
||||
val = -1UL;
|
||||
break;
|
||||
case 2:
|
||||
val = 0x5555555555555555UL;
|
||||
break;
|
||||
case 3:
|
||||
val = 0xaaaaaaaaaaaaaaaaUL;
|
||||
break;
|
||||
default:
|
||||
return;
|
||||
}
|
||||
|
||||
incr = sizeof(unsigned long);
|
||||
start_phys_aligned = ALIGN(start_phys, incr);
|
||||
count = (size - (start_phys_aligned - start_phys))/incr;
|
||||
start = __va(start_phys_aligned);
|
||||
start_bad = 0;
|
||||
last_bad = 0;
|
||||
|
||||
for (i = 0; i < count; i++)
|
||||
start[i] = val;
|
||||
for (i = 0; i < count; i++, start++, start_phys_aligned += incr) {
|
||||
if (*start != val) {
|
||||
if (start_phys_aligned == last_bad + incr) {
|
||||
last_bad += incr;
|
||||
} else {
|
||||
if (start_bad) {
|
||||
printk(KERN_CONT "\n %016lx bad mem addr %016lx - %016lx reserved",
|
||||
val, start_bad, last_bad + incr);
|
||||
reserve_early(start_bad, last_bad - start_bad, "BAD RAM");
|
||||
}
|
||||
start_bad = last_bad = start_phys_aligned;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (start_bad) {
|
||||
printk(KERN_CONT "\n %016lx bad mem addr %016lx - %016lx reserved",
|
||||
val, start_bad, last_bad + incr);
|
||||
reserve_early(start_bad, last_bad - start_bad, "BAD RAM");
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/* default is disabled */
|
||||
static int memtest_pattern __initdata;
|
||||
|
||||
static int __init parse_memtest(char *arg)
|
||||
{
|
||||
if (arg)
|
||||
memtest_pattern = simple_strtoul(arg, NULL, 0);
|
||||
return 0;
|
||||
}
|
||||
|
||||
early_param("memtest", parse_memtest);
|
||||
|
||||
static void __init early_memtest(unsigned long start, unsigned long end)
|
||||
{
|
||||
u64 t_start, t_size;
|
||||
unsigned pattern;
|
||||
|
||||
if (!memtest_pattern)
|
||||
return;
|
||||
|
||||
printk(KERN_INFO "early_memtest: pattern num %d", memtest_pattern);
|
||||
for (pattern = 0; pattern < memtest_pattern; pattern++) {
|
||||
t_start = start;
|
||||
t_size = 0;
|
||||
while (t_start < end) {
|
||||
t_start = find_e820_area_size(t_start, &t_size, 1);
|
||||
|
||||
/* done ? */
|
||||
if (t_start >= end)
|
||||
break;
|
||||
if (t_start + t_size > end)
|
||||
t_size = end - t_start;
|
||||
|
||||
printk(KERN_CONT "\n %016llx - %016llx pattern %d",
|
||||
(unsigned long long)t_start,
|
||||
(unsigned long long)t_start + t_size, pattern);
|
||||
|
||||
memtest(t_start, t_size, pattern);
|
||||
|
||||
t_start += t_size;
|
||||
}
|
||||
}
|
||||
printk(KERN_CONT "\n");
|
||||
}
|
||||
#else
|
||||
static void __init early_memtest(unsigned long start, unsigned long end)
|
||||
{
|
||||
}
|
||||
#endif
|
||||
|
||||
static unsigned long __init kernel_physical_mapping_init(unsigned long start,
|
||||
unsigned long end,
|
||||
unsigned long page_size_mask)
|
||||
|
|
|
@ -0,0 +1,123 @@
|
|||
#include <linux/kernel.h>
|
||||
#include <linux/errno.h>
|
||||
#include <linux/string.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/smp.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/pfn.h>
|
||||
|
||||
#include <asm/e820.h>
|
||||
|
||||
static void __init memtest(unsigned long start_phys, unsigned long size,
|
||||
unsigned pattern)
|
||||
{
|
||||
unsigned long i;
|
||||
unsigned long *start;
|
||||
unsigned long start_bad;
|
||||
unsigned long last_bad;
|
||||
unsigned long val;
|
||||
unsigned long start_phys_aligned;
|
||||
unsigned long count;
|
||||
unsigned long incr;
|
||||
|
||||
switch (pattern) {
|
||||
case 0:
|
||||
val = 0UL;
|
||||
break;
|
||||
case 1:
|
||||
val = -1UL;
|
||||
break;
|
||||
case 2:
|
||||
#ifdef CONFIG_X86_64
|
||||
val = 0x5555555555555555UL;
|
||||
#else
|
||||
val = 0x55555555UL;
|
||||
#endif
|
||||
break;
|
||||
case 3:
|
||||
#ifdef CONFIG_X86_64
|
||||
val = 0xaaaaaaaaaaaaaaaaUL;
|
||||
#else
|
||||
val = 0xaaaaaaaaUL;
|
||||
#endif
|
||||
break;
|
||||
default:
|
||||
return;
|
||||
}
|
||||
|
||||
incr = sizeof(unsigned long);
|
||||
start_phys_aligned = ALIGN(start_phys, incr);
|
||||
count = (size - (start_phys_aligned - start_phys))/incr;
|
||||
start = __va(start_phys_aligned);
|
||||
start_bad = 0;
|
||||
last_bad = 0;
|
||||
|
||||
for (i = 0; i < count; i++)
|
||||
start[i] = val;
|
||||
for (i = 0; i < count; i++, start++, start_phys_aligned += incr) {
|
||||
if (*start != val) {
|
||||
if (start_phys_aligned == last_bad + incr) {
|
||||
last_bad += incr;
|
||||
} else {
|
||||
if (start_bad) {
|
||||
printk(KERN_CONT "\n %010lx bad mem addr %010lx - %010lx reserved",
|
||||
val, start_bad, last_bad + incr);
|
||||
reserve_early(start_bad, last_bad - start_bad, "BAD RAM");
|
||||
}
|
||||
start_bad = last_bad = start_phys_aligned;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (start_bad) {
|
||||
printk(KERN_CONT "\n %016lx bad mem addr %010lx - %010lx reserved",
|
||||
val, start_bad, last_bad + incr);
|
||||
reserve_early(start_bad, last_bad - start_bad, "BAD RAM");
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/* default is disabled */
|
||||
static int memtest_pattern __initdata;
|
||||
|
||||
static int __init parse_memtest(char *arg)
|
||||
{
|
||||
if (arg)
|
||||
memtest_pattern = simple_strtoul(arg, NULL, 0);
|
||||
return 0;
|
||||
}
|
||||
|
||||
early_param("memtest", parse_memtest);
|
||||
|
||||
void __init early_memtest(unsigned long start, unsigned long end)
|
||||
{
|
||||
u64 t_start, t_size;
|
||||
unsigned pattern;
|
||||
|
||||
if (!memtest_pattern)
|
||||
return;
|
||||
|
||||
printk(KERN_INFO "early_memtest: pattern num %d", memtest_pattern);
|
||||
for (pattern = 0; pattern < memtest_pattern; pattern++) {
|
||||
t_start = start;
|
||||
t_size = 0;
|
||||
while (t_start < end) {
|
||||
t_start = find_e820_area_size(t_start, &t_size, 1);
|
||||
|
||||
/* done ? */
|
||||
if (t_start >= end)
|
||||
break;
|
||||
if (t_start + t_size > end)
|
||||
t_size = end - t_start;
|
||||
|
||||
printk(KERN_CONT "\n %010llx - %010llx pattern %d",
|
||||
(unsigned long long)t_start,
|
||||
(unsigned long long)t_start + t_size, pattern);
|
||||
|
||||
memtest(t_start, t_size, pattern);
|
||||
|
||||
t_start += t_size;
|
||||
}
|
||||
}
|
||||
printk(KERN_CONT "\n");
|
||||
}
|
|
@ -12,6 +12,8 @@
|
|||
#include <linux/gfp.h>
|
||||
#include <linux/fs.h>
|
||||
#include <linux/bootmem.h>
|
||||
#include <linux/debugfs.h>
|
||||
#include <linux/seq_file.h>
|
||||
|
||||
#include <asm/msr.h>
|
||||
#include <asm/tlbflush.h>
|
||||
|
@ -489,3 +491,89 @@ void unmap_devmem(unsigned long pfn, unsigned long size, pgprot_t vma_prot)
|
|||
|
||||
free_memtype(addr, addr + size);
|
||||
}
|
||||
|
||||
#if defined(CONFIG_DEBUG_FS)
|
||||
|
||||
/* get Nth element of the linked list */
|
||||
static struct memtype *memtype_get_idx(loff_t pos)
|
||||
{
|
||||
struct memtype *list_node, *print_entry;
|
||||
int i = 1;
|
||||
|
||||
print_entry = kmalloc(sizeof(struct memtype), GFP_KERNEL);
|
||||
if (!print_entry)
|
||||
return NULL;
|
||||
|
||||
spin_lock(&memtype_lock);
|
||||
list_for_each_entry(list_node, &memtype_list, nd) {
|
||||
if (pos == i) {
|
||||
*print_entry = *list_node;
|
||||
spin_unlock(&memtype_lock);
|
||||
return print_entry;
|
||||
}
|
||||
++i;
|
||||
}
|
||||
spin_unlock(&memtype_lock);
|
||||
kfree(print_entry);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void *memtype_seq_start(struct seq_file *seq, loff_t *pos)
|
||||
{
|
||||
if (*pos == 0) {
|
||||
++*pos;
|
||||
seq_printf(seq, "PAT memtype list:\n");
|
||||
}
|
||||
|
||||
return memtype_get_idx(*pos);
|
||||
}
|
||||
|
||||
static void *memtype_seq_next(struct seq_file *seq, void *v, loff_t *pos)
|
||||
{
|
||||
++*pos;
|
||||
return memtype_get_idx(*pos);
|
||||
}
|
||||
|
||||
static void memtype_seq_stop(struct seq_file *seq, void *v)
|
||||
{
|
||||
}
|
||||
|
||||
static int memtype_seq_show(struct seq_file *seq, void *v)
|
||||
{
|
||||
struct memtype *print_entry = (struct memtype *)v;
|
||||
|
||||
seq_printf(seq, "%s @ 0x%Lx-0x%Lx\n", cattr_name(print_entry->type),
|
||||
print_entry->start, print_entry->end);
|
||||
kfree(print_entry);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct seq_operations memtype_seq_ops = {
|
||||
.start = memtype_seq_start,
|
||||
.next = memtype_seq_next,
|
||||
.stop = memtype_seq_stop,
|
||||
.show = memtype_seq_show,
|
||||
};
|
||||
|
||||
static int memtype_seq_open(struct inode *inode, struct file *file)
|
||||
{
|
||||
return seq_open(file, &memtype_seq_ops);
|
||||
}
|
||||
|
||||
static const struct file_operations memtype_fops = {
|
||||
.open = memtype_seq_open,
|
||||
.read = seq_read,
|
||||
.llseek = seq_lseek,
|
||||
.release = seq_release,
|
||||
};
|
||||
|
||||
static int __init pat_memtype_list_init(void)
|
||||
{
|
||||
debugfs_create_file("pat_memtype_list", S_IRUSR, arch_debugfs_dir,
|
||||
NULL, &memtype_fops);
|
||||
return 0;
|
||||
}
|
||||
|
||||
late_initcall(pat_memtype_list_init);
|
||||
|
||||
#endif /* CONFIG_DEBUG_FS */
|
||||
|
|
|
@ -5,13 +5,13 @@ obj-$(CONFIG_PCI_MMCONFIG) += mmconfig_$(BITS).o direct.o mmconfig-shared.o
|
|||
obj-$(CONFIG_PCI_DIRECT) += direct.o
|
||||
obj-$(CONFIG_PCI_OLPC) += olpc.o
|
||||
|
||||
pci-y := fixup.o
|
||||
pci-$(CONFIG_ACPI) += acpi.o
|
||||
pci-y += legacy.o irq.o
|
||||
obj-y += fixup.o
|
||||
obj-$(CONFIG_ACPI) += acpi.o
|
||||
obj-y += legacy.o irq.o
|
||||
|
||||
pci-$(CONFIG_X86_VISWS) += visws.o
|
||||
obj-$(CONFIG_X86_VISWS) += visws.o
|
||||
|
||||
pci-$(CONFIG_X86_NUMAQ) += numa.o
|
||||
obj-$(CONFIG_X86_NUMAQ) += numaq_32.o
|
||||
|
||||
obj-y += $(pci-y) common.o early.o
|
||||
obj-y += common.o early.o
|
||||
obj-y += amd_bus.o
|
||||
|
|
|
@ -57,14 +57,17 @@ static int __init pci_legacy_init(void)
|
|||
|
||||
int __init pci_subsys_init(void)
|
||||
{
|
||||
#ifdef CONFIG_X86_NUMAQ
|
||||
pci_numaq_init();
|
||||
#endif
|
||||
#ifdef CONFIG_ACPI
|
||||
pci_acpi_init();
|
||||
#endif
|
||||
#ifdef CONFIG_X86_VISWS
|
||||
pci_visws_init();
|
||||
#endif
|
||||
pci_legacy_init();
|
||||
pcibios_irq_init();
|
||||
#ifdef CONFIG_X86_NUMAQ
|
||||
pci_numa_init();
|
||||
#endif
|
||||
pcibios_init();
|
||||
|
||||
return 0;
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* numa.c - Low-level PCI access for NUMA-Q machines
|
||||
* numaq_32.c - Low-level PCI access for NUMA-Q machines
|
||||
*/
|
||||
|
||||
#include <linux/pci.h>
|
||||
|
@ -151,7 +151,7 @@ static void __devinit pci_fixup_i450nx(struct pci_dev *d)
|
|||
}
|
||||
DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82451NX, pci_fixup_i450nx);
|
||||
|
||||
int __init pci_numa_init(void)
|
||||
int __init pci_numaq_init(void)
|
||||
{
|
||||
int quad;
|
||||
|
|
@ -108,7 +108,8 @@ extern void __init dmi_check_skip_isa_align(void);
|
|||
/* some common used subsys_initcalls */
|
||||
extern int __init pci_acpi_init(void);
|
||||
extern int __init pcibios_irq_init(void);
|
||||
extern int __init pci_numa_init(void);
|
||||
extern int __init pci_visws_init(void);
|
||||
extern int __init pci_numaq_init(void);
|
||||
extern int __init pcibios_init(void);
|
||||
|
||||
/* pci-mmconfig.c */
|
||||
|
|
|
@ -86,8 +86,14 @@ void __init pcibios_update_irq(struct pci_dev *dev, int irq)
|
|||
pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq);
|
||||
}
|
||||
|
||||
static int __init pci_visws_init(void)
|
||||
int __init pci_visws_init(void)
|
||||
{
|
||||
if (!is_visws_box())
|
||||
return -1;
|
||||
|
||||
pcibios_enable_irq = &pci_visws_enable_irq;
|
||||
pcibios_disable_irq = &pci_visws_disable_irq;
|
||||
|
||||
/* The VISWS supports configuration access type 1 only */
|
||||
pci_probe = (pci_probe | PCI_PROBE_CONF1) &
|
||||
~(PCI_PROBE_BIOS | PCI_PROBE_CONF2);
|
||||
|
@ -105,18 +111,3 @@ static int __init pci_visws_init(void)
|
|||
pcibios_resource_survey();
|
||||
return 0;
|
||||
}
|
||||
|
||||
static __init int pci_subsys_init(void)
|
||||
{
|
||||
if (!is_visws_box())
|
||||
return -1;
|
||||
|
||||
pcibios_enable_irq = &pci_visws_enable_irq;
|
||||
pcibios_disable_irq = &pci_visws_disable_irq;
|
||||
|
||||
pci_visws_init();
|
||||
pcibios_init();
|
||||
|
||||
return 0;
|
||||
}
|
||||
subsys_initcall(pci_subsys_init);
|
||||
|
|
|
@ -62,7 +62,7 @@ $(obj)/%-syms.lds: $(obj)/%.so.dbg FORCE
|
|||
# Build multiple 32-bit vDSO images to choose from at boot time.
|
||||
#
|
||||
obj-$(VDSO32-y) += vdso32-syms.lds
|
||||
vdso32.so-$(CONFIG_X86_32) += int80
|
||||
vdso32.so-$(VDSO32-y) += int80
|
||||
vdso32.so-$(CONFIG_COMPAT) += syscall
|
||||
vdso32.so-$(VDSO32-y) += sysenter
|
||||
|
||||
|
|
|
@ -193,17 +193,12 @@ static __init void relocate_vdso(Elf32_Ehdr *ehdr)
|
|||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* These symbols are defined by vdso32.S to mark the bounds
|
||||
* of the ELF DSO images included therein.
|
||||
*/
|
||||
extern const char vdso32_default_start, vdso32_default_end;
|
||||
extern const char vdso32_sysenter_start, vdso32_sysenter_end;
|
||||
static struct page *vdso32_pages[1];
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
|
||||
#define vdso32_sysenter() (boot_cpu_has(X86_FEATURE_SYSENTER32))
|
||||
#define vdso32_syscall() (boot_cpu_has(X86_FEATURE_SYSCALL32))
|
||||
|
||||
/* May not be __init: called during resume */
|
||||
void syscall32_cpu_init(void)
|
||||
|
@ -226,6 +221,7 @@ static inline void map_compat_vdso(int map)
|
|||
#else /* CONFIG_X86_32 */
|
||||
|
||||
#define vdso32_sysenter() (boot_cpu_has(X86_FEATURE_SEP))
|
||||
#define vdso32_syscall() (0)
|
||||
|
||||
void enable_sep_cpu(void)
|
||||
{
|
||||
|
@ -296,12 +292,15 @@ int __init sysenter_setup(void)
|
|||
gate_vma_init();
|
||||
#endif
|
||||
|
||||
if (!vdso32_sysenter()) {
|
||||
vsyscall = &vdso32_default_start;
|
||||
vsyscall_len = &vdso32_default_end - &vdso32_default_start;
|
||||
} else {
|
||||
if (vdso32_syscall()) {
|
||||
vsyscall = &vdso32_syscall_start;
|
||||
vsyscall_len = &vdso32_syscall_end - &vdso32_syscall_start;
|
||||
} else if (vdso32_sysenter()){
|
||||
vsyscall = &vdso32_sysenter_start;
|
||||
vsyscall_len = &vdso32_sysenter_end - &vdso32_sysenter_start;
|
||||
} else {
|
||||
vsyscall = &vdso32_int80_start;
|
||||
vsyscall_len = &vdso32_int80_end - &vdso32_int80_start;
|
||||
}
|
||||
|
||||
memcpy(syscall_page, vsyscall, vsyscall_len);
|
||||
|
|
|
@ -2,14 +2,17 @@
|
|||
|
||||
__INITDATA
|
||||
|
||||
.globl vdso32_default_start, vdso32_default_end
|
||||
vdso32_default_start:
|
||||
#ifdef CONFIG_X86_32
|
||||
.globl vdso32_int80_start, vdso32_int80_end
|
||||
vdso32_int80_start:
|
||||
.incbin "arch/x86/vdso/vdso32-int80.so"
|
||||
#else
|
||||
vdso32_int80_end:
|
||||
|
||||
.globl vdso32_syscall_start, vdso32_syscall_end
|
||||
vdso32_syscall_start:
|
||||
#ifdef CONFIG_COMPAT
|
||||
.incbin "arch/x86/vdso/vdso32-syscall.so"
|
||||
#endif
|
||||
vdso32_default_end:
|
||||
vdso32_syscall_end:
|
||||
|
||||
.globl vdso32_sysenter_start, vdso32_sysenter_end
|
||||
vdso32_sysenter_start:
|
||||
|
|
|
@ -21,7 +21,8 @@ unsigned int __read_mostly vdso_enabled = 1;
|
|||
extern char vdso_start[], vdso_end[];
|
||||
extern unsigned short vdso_sync_cpuid;
|
||||
|
||||
struct page **vdso_pages;
|
||||
static struct page **vdso_pages;
|
||||
static unsigned vdso_size;
|
||||
|
||||
static inline void *var_ref(void *p, char *name)
|
||||
{
|
||||
|
@ -38,6 +39,7 @@ static int __init init_vdso_vars(void)
|
|||
int i;
|
||||
char *vbase;
|
||||
|
||||
vdso_size = npages << PAGE_SHIFT;
|
||||
vdso_pages = kmalloc(sizeof(struct page *) * npages, GFP_KERNEL);
|
||||
if (!vdso_pages)
|
||||
goto oom;
|
||||
|
@ -101,20 +103,19 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int exstack)
|
|||
struct mm_struct *mm = current->mm;
|
||||
unsigned long addr;
|
||||
int ret;
|
||||
unsigned len = round_up(vdso_end - vdso_start, PAGE_SIZE);
|
||||
|
||||
if (!vdso_enabled)
|
||||
return 0;
|
||||
|
||||
down_write(&mm->mmap_sem);
|
||||
addr = vdso_addr(mm->start_stack, len);
|
||||
addr = get_unmapped_area(NULL, addr, len, 0, 0);
|
||||
addr = vdso_addr(mm->start_stack, vdso_size);
|
||||
addr = get_unmapped_area(NULL, addr, vdso_size, 0, 0);
|
||||
if (IS_ERR_VALUE(addr)) {
|
||||
ret = addr;
|
||||
goto up_fail;
|
||||
}
|
||||
|
||||
ret = install_special_mapping(mm, addr, len,
|
||||
ret = install_special_mapping(mm, addr, vdso_size,
|
||||
VM_READ|VM_EXEC|
|
||||
VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC|
|
||||
VM_ALWAYSDUMP,
|
||||
|
|
|
@ -6,8 +6,8 @@ config XEN
|
|||
bool "Xen guest support"
|
||||
select PARAVIRT
|
||||
select PARAVIRT_CLOCK
|
||||
depends on X86_32
|
||||
depends on X86_CMPXCHG && X86_TSC && X86_PAE && !(X86_VISWS || X86_VOYAGER)
|
||||
depends on X86_64 || (X86_32 && X86_PAE && !(X86_VISWS || X86_VOYAGER))
|
||||
depends on X86_CMPXCHG && X86_TSC
|
||||
help
|
||||
This is the Linux Xen port. Enabling this will allow the
|
||||
kernel to boot in a paravirtualized environment under the
|
||||
|
@ -15,10 +15,16 @@ config XEN
|
|||
|
||||
config XEN_MAX_DOMAIN_MEMORY
|
||||
int "Maximum allowed size of a domain in gigabytes"
|
||||
default 8
|
||||
default 8 if X86_32
|
||||
default 32 if X86_64
|
||||
depends on XEN
|
||||
help
|
||||
The pseudo-physical to machine address array is sized
|
||||
according to the maximum possible memory size of a Xen
|
||||
domain. This array uses 1 page per gigabyte, so there's no
|
||||
need to be too stingy here.
|
||||
need to be too stingy here.
|
||||
|
||||
config XEN_SAVE_RESTORE
|
||||
bool
|
||||
depends on PM
|
||||
default y
|
|
@ -1,4 +1,4 @@
|
|||
obj-y := enlighten.o setup.o multicalls.o mmu.o \
|
||||
time.o xen-asm.o grant-table.o suspend.o
|
||||
time.o xen-asm_$(BITS).o grant-table.o suspend.o
|
||||
|
||||
obj-$(CONFIG_SMP) += smp.o
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -44,8 +44,10 @@
|
|||
|
||||
#include <asm/pgtable.h>
|
||||
#include <asm/tlbflush.h>
|
||||
#include <asm/fixmap.h>
|
||||
#include <asm/mmu_context.h>
|
||||
#include <asm/paravirt.h>
|
||||
#include <asm/linkage.h>
|
||||
|
||||
#include <asm/xen/hypercall.h>
|
||||
#include <asm/xen/hypervisor.h>
|
||||
|
@ -56,26 +58,29 @@
|
|||
#include "multicalls.h"
|
||||
#include "mmu.h"
|
||||
|
||||
/*
|
||||
* Just beyond the highest usermode address. STACK_TOP_MAX has a
|
||||
* redzone above it, so round it up to a PGD boundary.
|
||||
*/
|
||||
#define USER_LIMIT ((STACK_TOP_MAX + PGDIR_SIZE - 1) & PGDIR_MASK)
|
||||
|
||||
|
||||
#define P2M_ENTRIES_PER_PAGE (PAGE_SIZE / sizeof(unsigned long))
|
||||
#define TOP_ENTRIES (MAX_DOMAIN_PAGES / P2M_ENTRIES_PER_PAGE)
|
||||
|
||||
/* Placeholder for holes in the address space */
|
||||
static unsigned long p2m_missing[P2M_ENTRIES_PER_PAGE]
|
||||
__attribute__((section(".data.page_aligned"))) =
|
||||
static unsigned long p2m_missing[P2M_ENTRIES_PER_PAGE] __page_aligned_data =
|
||||
{ [ 0 ... P2M_ENTRIES_PER_PAGE-1 ] = ~0UL };
|
||||
|
||||
/* Array of pointers to pages containing p2m entries */
|
||||
static unsigned long *p2m_top[TOP_ENTRIES]
|
||||
__attribute__((section(".data.page_aligned"))) =
|
||||
static unsigned long *p2m_top[TOP_ENTRIES] __page_aligned_data =
|
||||
{ [ 0 ... TOP_ENTRIES - 1] = &p2m_missing[0] };
|
||||
|
||||
/* Arrays of p2m arrays expressed in mfns used for save/restore */
|
||||
static unsigned long p2m_top_mfn[TOP_ENTRIES]
|
||||
__attribute__((section(".bss.page_aligned")));
|
||||
static unsigned long p2m_top_mfn[TOP_ENTRIES] __page_aligned_bss;
|
||||
|
||||
static unsigned long p2m_top_mfn_list[
|
||||
PAGE_ALIGN(TOP_ENTRIES / P2M_ENTRIES_PER_PAGE)]
|
||||
__attribute__((section(".bss.page_aligned")));
|
||||
static unsigned long p2m_top_mfn_list[TOP_ENTRIES / P2M_ENTRIES_PER_PAGE]
|
||||
__page_aligned_bss;
|
||||
|
||||
static inline unsigned p2m_top_index(unsigned long pfn)
|
||||
{
|
||||
|
@ -181,15 +186,16 @@ void set_phys_to_machine(unsigned long pfn, unsigned long mfn)
|
|||
p2m_top[topidx][idx] = mfn;
|
||||
}
|
||||
|
||||
xmaddr_t arbitrary_virt_to_machine(unsigned long address)
|
||||
xmaddr_t arbitrary_virt_to_machine(void *vaddr)
|
||||
{
|
||||
unsigned long address = (unsigned long)vaddr;
|
||||
unsigned int level;
|
||||
pte_t *pte = lookup_address(address, &level);
|
||||
unsigned offset = address & ~PAGE_MASK;
|
||||
|
||||
BUG_ON(pte == NULL);
|
||||
|
||||
return XMADDR((pte_mfn(*pte) << PAGE_SHIFT) + offset);
|
||||
return XMADDR(((phys_addr_t)pte_mfn(*pte) << PAGE_SHIFT) + offset);
|
||||
}
|
||||
|
||||
void make_lowmem_page_readonly(void *vaddr)
|
||||
|
@ -256,7 +262,8 @@ void xen_set_pmd_hyper(pmd_t *ptr, pmd_t val)
|
|||
|
||||
xen_mc_batch();
|
||||
|
||||
u.ptr = virt_to_machine(ptr).maddr;
|
||||
/* ptr may be ioremapped for 64-bit pagetable setup */
|
||||
u.ptr = arbitrary_virt_to_machine(ptr).maddr;
|
||||
u.val = pmd_val_ma(val);
|
||||
extend_mmu_update(&u);
|
||||
|
||||
|
@ -283,35 +290,7 @@ void xen_set_pmd(pmd_t *ptr, pmd_t val)
|
|||
*/
|
||||
void set_pte_mfn(unsigned long vaddr, unsigned long mfn, pgprot_t flags)
|
||||
{
|
||||
pgd_t *pgd;
|
||||
pud_t *pud;
|
||||
pmd_t *pmd;
|
||||
pte_t *pte;
|
||||
|
||||
pgd = swapper_pg_dir + pgd_index(vaddr);
|
||||
if (pgd_none(*pgd)) {
|
||||
BUG();
|
||||
return;
|
||||
}
|
||||
pud = pud_offset(pgd, vaddr);
|
||||
if (pud_none(*pud)) {
|
||||
BUG();
|
||||
return;
|
||||
}
|
||||
pmd = pmd_offset(pud, vaddr);
|
||||
if (pmd_none(*pmd)) {
|
||||
BUG();
|
||||
return;
|
||||
}
|
||||
pte = pte_offset_kernel(pmd, vaddr);
|
||||
/* <mfn,flags> stored as-is, to permit clearing entries */
|
||||
xen_set_pte(pte, mfn_pte(mfn, flags));
|
||||
|
||||
/*
|
||||
* It's enough to flush this one mapping.
|
||||
* (PGE mappings get flushed as well)
|
||||
*/
|
||||
__flush_tlb_one(vaddr);
|
||||
set_pte_vaddr(vaddr, mfn_pte(mfn, flags));
|
||||
}
|
||||
|
||||
void xen_set_pte_at(struct mm_struct *mm, unsigned long addr,
|
||||
|
@ -418,7 +397,8 @@ void xen_set_pud_hyper(pud_t *ptr, pud_t val)
|
|||
|
||||
xen_mc_batch();
|
||||
|
||||
u.ptr = virt_to_machine(ptr).maddr;
|
||||
/* ptr may be ioremapped for 64-bit pagetable setup */
|
||||
u.ptr = arbitrary_virt_to_machine(ptr).maddr;
|
||||
u.val = pud_val_ma(val);
|
||||
extend_mmu_update(&u);
|
||||
|
||||
|
@ -441,14 +421,19 @@ void xen_set_pud(pud_t *ptr, pud_t val)
|
|||
|
||||
void xen_set_pte(pte_t *ptep, pte_t pte)
|
||||
{
|
||||
#ifdef CONFIG_X86_PAE
|
||||
ptep->pte_high = pte.pte_high;
|
||||
smp_wmb();
|
||||
ptep->pte_low = pte.pte_low;
|
||||
#else
|
||||
*ptep = pte;
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifdef CONFIG_X86_PAE
|
||||
void xen_set_pte_atomic(pte_t *ptep, pte_t pte)
|
||||
{
|
||||
set_64bit((u64 *)ptep, pte_val_ma(pte));
|
||||
set_64bit((u64 *)ptep, native_pte_val(pte));
|
||||
}
|
||||
|
||||
void xen_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
|
||||
|
@ -462,6 +447,7 @@ void xen_pmd_clear(pmd_t *pmdp)
|
|||
{
|
||||
set_pmd(pmdp, __pmd(0));
|
||||
}
|
||||
#endif /* CONFIG_X86_PAE */
|
||||
|
||||
pmd_t xen_make_pmd(pmdval_t pmd)
|
||||
{
|
||||
|
@ -469,78 +455,189 @@ pmd_t xen_make_pmd(pmdval_t pmd)
|
|||
return native_make_pmd(pmd);
|
||||
}
|
||||
|
||||
#if PAGETABLE_LEVELS == 4
|
||||
pudval_t xen_pud_val(pud_t pud)
|
||||
{
|
||||
return pte_mfn_to_pfn(pud.pud);
|
||||
}
|
||||
|
||||
pud_t xen_make_pud(pudval_t pud)
|
||||
{
|
||||
pud = pte_pfn_to_mfn(pud);
|
||||
|
||||
return native_make_pud(pud);
|
||||
}
|
||||
|
||||
pgd_t *xen_get_user_pgd(pgd_t *pgd)
|
||||
{
|
||||
pgd_t *pgd_page = (pgd_t *)(((unsigned long)pgd) & PAGE_MASK);
|
||||
unsigned offset = pgd - pgd_page;
|
||||
pgd_t *user_ptr = NULL;
|
||||
|
||||
if (offset < pgd_index(USER_LIMIT)) {
|
||||
struct page *page = virt_to_page(pgd_page);
|
||||
user_ptr = (pgd_t *)page->private;
|
||||
if (user_ptr)
|
||||
user_ptr += offset;
|
||||
}
|
||||
|
||||
return user_ptr;
|
||||
}
|
||||
|
||||
static void __xen_set_pgd_hyper(pgd_t *ptr, pgd_t val)
|
||||
{
|
||||
struct mmu_update u;
|
||||
|
||||
u.ptr = virt_to_machine(ptr).maddr;
|
||||
u.val = pgd_val_ma(val);
|
||||
extend_mmu_update(&u);
|
||||
}
|
||||
|
||||
/*
|
||||
(Yet another) pagetable walker. This one is intended for pinning a
|
||||
pagetable. This means that it walks a pagetable and calls the
|
||||
callback function on each page it finds making up the page table,
|
||||
at every level. It walks the entire pagetable, but it only bothers
|
||||
pinning pte pages which are below pte_limit. In the normal case
|
||||
this will be TASK_SIZE, but at boot we need to pin up to
|
||||
FIXADDR_TOP. But the important bit is that we don't pin beyond
|
||||
there, because then we start getting into Xen's ptes.
|
||||
*/
|
||||
static int pgd_walk(pgd_t *pgd_base, int (*func)(struct page *, enum pt_level),
|
||||
* Raw hypercall-based set_pgd, intended for in early boot before
|
||||
* there's a page structure. This implies:
|
||||
* 1. The only existing pagetable is the kernel's
|
||||
* 2. It is always pinned
|
||||
* 3. It has no user pagetable attached to it
|
||||
*/
|
||||
void __init xen_set_pgd_hyper(pgd_t *ptr, pgd_t val)
|
||||
{
|
||||
preempt_disable();
|
||||
|
||||
xen_mc_batch();
|
||||
|
||||
__xen_set_pgd_hyper(ptr, val);
|
||||
|
||||
xen_mc_issue(PARAVIRT_LAZY_MMU);
|
||||
|
||||
preempt_enable();
|
||||
}
|
||||
|
||||
void xen_set_pgd(pgd_t *ptr, pgd_t val)
|
||||
{
|
||||
pgd_t *user_ptr = xen_get_user_pgd(ptr);
|
||||
|
||||
/* If page is not pinned, we can just update the entry
|
||||
directly */
|
||||
if (!page_pinned(ptr)) {
|
||||
*ptr = val;
|
||||
if (user_ptr) {
|
||||
WARN_ON(page_pinned(user_ptr));
|
||||
*user_ptr = val;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
/* If it's pinned, then we can at least batch the kernel and
|
||||
user updates together. */
|
||||
xen_mc_batch();
|
||||
|
||||
__xen_set_pgd_hyper(ptr, val);
|
||||
if (user_ptr)
|
||||
__xen_set_pgd_hyper(user_ptr, val);
|
||||
|
||||
xen_mc_issue(PARAVIRT_LAZY_MMU);
|
||||
}
|
||||
#endif /* PAGETABLE_LEVELS == 4 */
|
||||
|
||||
/*
|
||||
* (Yet another) pagetable walker. This one is intended for pinning a
|
||||
* pagetable. This means that it walks a pagetable and calls the
|
||||
* callback function on each page it finds making up the page table,
|
||||
* at every level. It walks the entire pagetable, but it only bothers
|
||||
* pinning pte pages which are below limit. In the normal case this
|
||||
* will be STACK_TOP_MAX, but at boot we need to pin up to
|
||||
* FIXADDR_TOP.
|
||||
*
|
||||
* For 32-bit the important bit is that we don't pin beyond there,
|
||||
* because then we start getting into Xen's ptes.
|
||||
*
|
||||
* For 64-bit, we must skip the Xen hole in the middle of the address
|
||||
* space, just after the big x86-64 virtual hole.
|
||||
*/
|
||||
static int pgd_walk(pgd_t *pgd, int (*func)(struct page *, enum pt_level),
|
||||
unsigned long limit)
|
||||
{
|
||||
pgd_t *pgd = pgd_base;
|
||||
int flush = 0;
|
||||
unsigned long addr = 0;
|
||||
unsigned long pgd_next;
|
||||
unsigned hole_low, hole_high;
|
||||
unsigned pgdidx_limit, pudidx_limit, pmdidx_limit;
|
||||
unsigned pgdidx, pudidx, pmdidx;
|
||||
|
||||
BUG_ON(limit > FIXADDR_TOP);
|
||||
/* The limit is the last byte to be touched */
|
||||
limit--;
|
||||
BUG_ON(limit >= FIXADDR_TOP);
|
||||
|
||||
if (xen_feature(XENFEAT_auto_translated_physmap))
|
||||
return 0;
|
||||
|
||||
for (; addr != FIXADDR_TOP; pgd++, addr = pgd_next) {
|
||||
/*
|
||||
* 64-bit has a great big hole in the middle of the address
|
||||
* space, which contains the Xen mappings. On 32-bit these
|
||||
* will end up making a zero-sized hole and so is a no-op.
|
||||
*/
|
||||
hole_low = pgd_index(USER_LIMIT);
|
||||
hole_high = pgd_index(PAGE_OFFSET);
|
||||
|
||||
pgdidx_limit = pgd_index(limit);
|
||||
#if PTRS_PER_PUD > 1
|
||||
pudidx_limit = pud_index(limit);
|
||||
#else
|
||||
pudidx_limit = 0;
|
||||
#endif
|
||||
#if PTRS_PER_PMD > 1
|
||||
pmdidx_limit = pmd_index(limit);
|
||||
#else
|
||||
pmdidx_limit = 0;
|
||||
#endif
|
||||
|
||||
flush |= (*func)(virt_to_page(pgd), PT_PGD);
|
||||
|
||||
for (pgdidx = 0; pgdidx <= pgdidx_limit; pgdidx++) {
|
||||
pud_t *pud;
|
||||
unsigned long pud_limit, pud_next;
|
||||
|
||||
pgd_next = pud_limit = pgd_addr_end(addr, FIXADDR_TOP);
|
||||
|
||||
if (!pgd_val(*pgd))
|
||||
if (pgdidx >= hole_low && pgdidx < hole_high)
|
||||
continue;
|
||||
|
||||
pud = pud_offset(pgd, 0);
|
||||
if (!pgd_val(pgd[pgdidx]))
|
||||
continue;
|
||||
|
||||
pud = pud_offset(&pgd[pgdidx], 0);
|
||||
|
||||
if (PTRS_PER_PUD > 1) /* not folded */
|
||||
flush |= (*func)(virt_to_page(pud), PT_PUD);
|
||||
|
||||
for (; addr != pud_limit; pud++, addr = pud_next) {
|
||||
for (pudidx = 0; pudidx < PTRS_PER_PUD; pudidx++) {
|
||||
pmd_t *pmd;
|
||||
unsigned long pmd_limit;
|
||||
|
||||
pud_next = pud_addr_end(addr, pud_limit);
|
||||
if (pgdidx == pgdidx_limit &&
|
||||
pudidx > pudidx_limit)
|
||||
goto out;
|
||||
|
||||
if (pud_next < limit)
|
||||
pmd_limit = pud_next;
|
||||
else
|
||||
pmd_limit = limit;
|
||||
|
||||
if (pud_none(*pud))
|
||||
if (pud_none(pud[pudidx]))
|
||||
continue;
|
||||
|
||||
pmd = pmd_offset(pud, 0);
|
||||
pmd = pmd_offset(&pud[pudidx], 0);
|
||||
|
||||
if (PTRS_PER_PMD > 1) /* not folded */
|
||||
flush |= (*func)(virt_to_page(pmd), PT_PMD);
|
||||
|
||||
for (; addr != pmd_limit; pmd++) {
|
||||
addr += (PAGE_SIZE * PTRS_PER_PTE);
|
||||
if ((pmd_limit-1) < (addr-1)) {
|
||||
addr = pmd_limit;
|
||||
break;
|
||||
}
|
||||
for (pmdidx = 0; pmdidx < PTRS_PER_PMD; pmdidx++) {
|
||||
struct page *pte;
|
||||
|
||||
if (pmd_none(*pmd))
|
||||
if (pgdidx == pgdidx_limit &&
|
||||
pudidx == pudidx_limit &&
|
||||
pmdidx > pmdidx_limit)
|
||||
goto out;
|
||||
|
||||
if (pmd_none(pmd[pmdidx]))
|
||||
continue;
|
||||
|
||||
flush |= (*func)(pmd_page(*pmd), PT_PTE);
|
||||
pte = pmd_page(pmd[pmdidx]);
|
||||
flush |= (*func)(pte, PT_PTE);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
flush |= (*func)(virt_to_page(pgd_base), PT_PGD);
|
||||
out:
|
||||
|
||||
return flush;
|
||||
}
|
||||
|
@ -622,14 +719,31 @@ void xen_pgd_pin(pgd_t *pgd)
|
|||
{
|
||||
xen_mc_batch();
|
||||
|
||||
if (pgd_walk(pgd, pin_page, TASK_SIZE)) {
|
||||
if (pgd_walk(pgd, pin_page, USER_LIMIT)) {
|
||||
/* re-enable interrupts for kmap_flush_unused */
|
||||
xen_mc_issue(0);
|
||||
kmap_flush_unused();
|
||||
xen_mc_batch();
|
||||
}
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
{
|
||||
pgd_t *user_pgd = xen_get_user_pgd(pgd);
|
||||
|
||||
xen_do_pin(MMUEXT_PIN_L4_TABLE, PFN_DOWN(__pa(pgd)));
|
||||
|
||||
if (user_pgd) {
|
||||
pin_page(virt_to_page(user_pgd), PT_PGD);
|
||||
xen_do_pin(MMUEXT_PIN_L4_TABLE, PFN_DOWN(__pa(user_pgd)));
|
||||
}
|
||||
}
|
||||
#else /* CONFIG_X86_32 */
|
||||
#ifdef CONFIG_X86_PAE
|
||||
/* Need to make sure unshared kernel PMD is pinnable */
|
||||
pin_page(virt_to_page(pgd_page(pgd[pgd_index(TASK_SIZE)])), PT_PMD);
|
||||
#endif
|
||||
xen_do_pin(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(pgd)));
|
||||
#endif /* CONFIG_X86_64 */
|
||||
xen_mc_issue(0);
|
||||
}
|
||||
|
||||
|
@ -656,9 +770,11 @@ void xen_mm_pin_all(void)
|
|||
spin_unlock_irqrestore(&pgd_lock, flags);
|
||||
}
|
||||
|
||||
/* The init_mm pagetable is really pinned as soon as its created, but
|
||||
that's before we have page structures to store the bits. So do all
|
||||
the book-keeping now. */
|
||||
/*
|
||||
* The init_mm pagetable is really pinned as soon as its created, but
|
||||
* that's before we have page structures to store the bits. So do all
|
||||
* the book-keeping now.
|
||||
*/
|
||||
static __init int mark_pinned(struct page *page, enum pt_level level)
|
||||
{
|
||||
SetPagePinned(page);
|
||||
|
@ -708,7 +824,23 @@ static void xen_pgd_unpin(pgd_t *pgd)
|
|||
|
||||
xen_do_pin(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd)));
|
||||
|
||||
pgd_walk(pgd, unpin_page, TASK_SIZE);
|
||||
#ifdef CONFIG_X86_64
|
||||
{
|
||||
pgd_t *user_pgd = xen_get_user_pgd(pgd);
|
||||
|
||||
if (user_pgd) {
|
||||
xen_do_pin(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(user_pgd)));
|
||||
unpin_page(virt_to_page(user_pgd), PT_PGD);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_X86_PAE
|
||||
/* Need to make sure unshared kernel PMD is unpinned */
|
||||
pin_page(virt_to_page(pgd_page(pgd[pgd_index(TASK_SIZE)])), PT_PMD);
|
||||
#endif
|
||||
|
||||
pgd_walk(pgd, unpin_page, USER_LIMIT);
|
||||
|
||||
xen_mc_issue(0);
|
||||
}
|
||||
|
@ -727,7 +859,6 @@ void xen_mm_unpin_all(void)
|
|||
list_for_each_entry(page, &pgd_list, lru) {
|
||||
if (PageSavePinned(page)) {
|
||||
BUG_ON(!PagePinned(page));
|
||||
printk("unpinning pinned %p\n", page_address(page));
|
||||
xen_pgd_unpin((pgd_t *)page_address(page));
|
||||
ClearPageSavePinned(page);
|
||||
}
|
||||
|
@ -757,8 +888,15 @@ void xen_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm)
|
|||
static void drop_other_mm_ref(void *info)
|
||||
{
|
||||
struct mm_struct *mm = info;
|
||||
struct mm_struct *active_mm;
|
||||
|
||||
if (__get_cpu_var(cpu_tlbstate).active_mm == mm)
|
||||
#ifdef CONFIG_X86_64
|
||||
active_mm = read_pda(active_mm);
|
||||
#else
|
||||
active_mm = __get_cpu_var(cpu_tlbstate).active_mm;
|
||||
#endif
|
||||
|
||||
if (active_mm == mm)
|
||||
leave_mm(smp_processor_id());
|
||||
|
||||
/* If this cpu still has a stale cr3 reference, then make sure
|
||||
|
|
|
@ -10,18 +10,6 @@ enum pt_level {
|
|||
PT_PTE
|
||||
};
|
||||
|
||||
/*
|
||||
* Page-directory addresses above 4GB do not fit into architectural %cr3.
|
||||
* When accessing %cr3, or equivalent field in vcpu_guest_context, guests
|
||||
* must use the following accessor macros to pack/unpack valid MFNs.
|
||||
*
|
||||
* Note that Xen is using the fact that the pagetable base is always
|
||||
* page-aligned, and putting the 12 MSB of the address into the 12 LSB
|
||||
* of cr3.
|
||||
*/
|
||||
#define xen_pfn_to_cr3(pfn) (((unsigned)(pfn) << 12) | ((unsigned)(pfn) >> 20))
|
||||
#define xen_cr3_to_pfn(cr3) (((unsigned)(cr3) >> 12) | ((unsigned)(cr3) << 20))
|
||||
|
||||
|
||||
void set_pte_mfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags);
|
||||
|
||||
|
@ -44,13 +32,26 @@ pgd_t xen_make_pgd(pgdval_t);
|
|||
void xen_set_pte(pte_t *ptep, pte_t pteval);
|
||||
void xen_set_pte_at(struct mm_struct *mm, unsigned long addr,
|
||||
pte_t *ptep, pte_t pteval);
|
||||
|
||||
#ifdef CONFIG_X86_PAE
|
||||
void xen_set_pte_atomic(pte_t *ptep, pte_t pte);
|
||||
void xen_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep);
|
||||
void xen_pmd_clear(pmd_t *pmdp);
|
||||
#endif /* CONFIG_X86_PAE */
|
||||
|
||||
void xen_set_pmd(pmd_t *pmdp, pmd_t pmdval);
|
||||
void xen_set_pud(pud_t *ptr, pud_t val);
|
||||
void xen_set_pmd_hyper(pmd_t *pmdp, pmd_t pmdval);
|
||||
void xen_set_pud_hyper(pud_t *ptr, pud_t val);
|
||||
void xen_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep);
|
||||
void xen_pmd_clear(pmd_t *pmdp);
|
||||
|
||||
#if PAGETABLE_LEVELS == 4
|
||||
pudval_t xen_pud_val(pud_t pud);
|
||||
pud_t xen_make_pud(pudval_t pudval);
|
||||
void xen_set_pgd(pgd_t *pgdp, pgd_t pgd);
|
||||
void xen_set_pgd_hyper(pgd_t *pgdp, pgd_t pgd);
|
||||
#endif
|
||||
|
||||
pgd_t *xen_get_user_pgd(pgd_t *pgd);
|
||||
|
||||
pte_t xen_ptep_modify_prot_start(struct mm_struct *mm, unsigned long addr, pte_t *ptep);
|
||||
void xen_ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr,
|
||||
|
|
|
@ -76,6 +76,7 @@ void xen_mc_flush(void)
|
|||
if (ret) {
|
||||
printk(KERN_ERR "%d multicall(s) failed: cpu %d\n",
|
||||
ret, smp_processor_id());
|
||||
dump_stack();
|
||||
for (i = 0; i < b->mcidx; i++) {
|
||||
printk(" call %2d/%d: op=%lu arg=[%lx] result=%ld\n",
|
||||
i+1, b->mcidx,
|
||||
|
|
|
@ -83,30 +83,72 @@ static void xen_idle(void)
|
|||
|
||||
/*
|
||||
* Set the bit indicating "nosegneg" library variants should be used.
|
||||
* We only need to bother in pure 32-bit mode; compat 32-bit processes
|
||||
* can have un-truncated segments, so wrapping around is allowed.
|
||||
*/
|
||||
static void __init fiddle_vdso(void)
|
||||
{
|
||||
extern const char vdso32_default_start;
|
||||
u32 *mask = VDSO32_SYMBOL(&vdso32_default_start, NOTE_MASK);
|
||||
#ifdef CONFIG_X86_32
|
||||
u32 *mask;
|
||||
mask = VDSO32_SYMBOL(&vdso32_int80_start, NOTE_MASK);
|
||||
*mask |= 1 << VDSO_NOTE_NONEGSEG_BIT;
|
||||
mask = VDSO32_SYMBOL(&vdso32_sysenter_start, NOTE_MASK);
|
||||
*mask |= 1 << VDSO_NOTE_NONEGSEG_BIT;
|
||||
#endif
|
||||
}
|
||||
|
||||
void xen_enable_sysenter(void)
|
||||
static __cpuinit int register_callback(unsigned type, const void *func)
|
||||
{
|
||||
int cpu = smp_processor_id();
|
||||
extern void xen_sysenter_target(void);
|
||||
/* Mask events on entry, even though they get enabled immediately */
|
||||
static struct callback_register sysenter = {
|
||||
.type = CALLBACKTYPE_sysenter,
|
||||
.address = { __KERNEL_CS, (unsigned long)xen_sysenter_target },
|
||||
struct callback_register callback = {
|
||||
.type = type,
|
||||
.address = XEN_CALLBACK(__KERNEL_CS, func),
|
||||
.flags = CALLBACKF_mask_events,
|
||||
};
|
||||
|
||||
if (!boot_cpu_has(X86_FEATURE_SEP) ||
|
||||
HYPERVISOR_callback_op(CALLBACKOP_register, &sysenter) != 0) {
|
||||
clear_cpu_cap(&cpu_data(cpu), X86_FEATURE_SEP);
|
||||
clear_cpu_cap(&boot_cpu_data, X86_FEATURE_SEP);
|
||||
return HYPERVISOR_callback_op(CALLBACKOP_register, &callback);
|
||||
}
|
||||
|
||||
void __cpuinit xen_enable_sysenter(void)
|
||||
{
|
||||
extern void xen_sysenter_target(void);
|
||||
int ret;
|
||||
unsigned sysenter_feature;
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
sysenter_feature = X86_FEATURE_SEP;
|
||||
#else
|
||||
sysenter_feature = X86_FEATURE_SYSENTER32;
|
||||
#endif
|
||||
|
||||
if (!boot_cpu_has(sysenter_feature))
|
||||
return;
|
||||
|
||||
ret = register_callback(CALLBACKTYPE_sysenter, xen_sysenter_target);
|
||||
if(ret != 0)
|
||||
setup_clear_cpu_cap(sysenter_feature);
|
||||
}
|
||||
|
||||
void __cpuinit xen_enable_syscall(void)
|
||||
{
|
||||
#ifdef CONFIG_X86_64
|
||||
int ret;
|
||||
extern void xen_syscall_target(void);
|
||||
extern void xen_syscall32_target(void);
|
||||
|
||||
ret = register_callback(CALLBACKTYPE_syscall, xen_syscall_target);
|
||||
if (ret != 0) {
|
||||
printk(KERN_ERR "Failed to set syscall callback: %d\n", ret);
|
||||
/* Pretty fatal; 64-bit userspace has no other
|
||||
mechanism for syscalls. */
|
||||
}
|
||||
|
||||
if (boot_cpu_has(X86_FEATURE_SYSCALL32)) {
|
||||
ret = register_callback(CALLBACKTYPE_syscall32,
|
||||
xen_syscall32_target);
|
||||
if (ret != 0)
|
||||
setup_clear_cpu_cap(X86_FEATURE_SYSCALL32);
|
||||
}
|
||||
#endif /* CONFIG_X86_64 */
|
||||
}
|
||||
|
||||
void __init xen_arch_setup(void)
|
||||
|
@ -120,10 +162,12 @@ void __init xen_arch_setup(void)
|
|||
if (!xen_feature(XENFEAT_auto_translated_physmap))
|
||||
HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_pae_extended_cr3);
|
||||
|
||||
HYPERVISOR_set_callbacks(__KERNEL_CS, (unsigned long)xen_hypervisor_callback,
|
||||
__KERNEL_CS, (unsigned long)xen_failsafe_callback);
|
||||
if (register_callback(CALLBACKTYPE_event, xen_hypervisor_callback) ||
|
||||
register_callback(CALLBACKTYPE_failsafe, xen_failsafe_callback))
|
||||
BUG();
|
||||
|
||||
xen_enable_sysenter();
|
||||
xen_enable_syscall();
|
||||
|
||||
set_iopl.iopl = 1;
|
||||
rc = HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl);
|
||||
|
@ -143,11 +187,6 @@ void __init xen_arch_setup(void)
|
|||
|
||||
pm_idle = xen_idle;
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
/* fill cpus_possible with all available cpus */
|
||||
xen_fill_possible_map();
|
||||
#endif
|
||||
|
||||
paravirt_disable_iospace();
|
||||
|
||||
fiddle_vdso();
|
||||
|
|
|
@ -66,13 +66,22 @@ static __cpuinit void cpu_bringup_and_idle(void)
|
|||
int cpu = smp_processor_id();
|
||||
|
||||
cpu_init();
|
||||
xen_enable_sysenter();
|
||||
|
||||
preempt_disable();
|
||||
per_cpu(cpu_state, cpu) = CPU_ONLINE;
|
||||
|
||||
xen_enable_sysenter();
|
||||
xen_enable_syscall();
|
||||
|
||||
cpu = smp_processor_id();
|
||||
smp_store_cpu_info(cpu);
|
||||
cpu_data(cpu).x86_max_cores = 1;
|
||||
set_cpu_sibling_map(cpu);
|
||||
|
||||
xen_setup_cpu_clockevents();
|
||||
|
||||
cpu_set(cpu, cpu_online_map);
|
||||
x86_write_percpu(cpu_state, CPU_ONLINE);
|
||||
wmb();
|
||||
|
||||
/* We can take interrupts now: we're officially "up". */
|
||||
local_irq_enable();
|
||||
|
||||
|
@ -141,56 +150,37 @@ static int xen_smp_intr_init(unsigned int cpu)
|
|||
return rc;
|
||||
}
|
||||
|
||||
void __init xen_fill_possible_map(void)
|
||||
static void __init xen_fill_possible_map(void)
|
||||
{
|
||||
int i, rc;
|
||||
|
||||
for (i = 0; i < NR_CPUS; i++) {
|
||||
rc = HYPERVISOR_vcpu_op(VCPUOP_is_up, i, NULL);
|
||||
if (rc >= 0)
|
||||
if (rc >= 0) {
|
||||
num_processors++;
|
||||
cpu_set(i, cpu_possible_map);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void __init xen_smp_prepare_boot_cpu(void)
|
||||
static void __init xen_smp_prepare_boot_cpu(void)
|
||||
{
|
||||
int cpu;
|
||||
|
||||
BUG_ON(smp_processor_id() != 0);
|
||||
native_smp_prepare_boot_cpu();
|
||||
|
||||
/* We've switched to the "real" per-cpu gdt, so make sure the
|
||||
old memory can be recycled */
|
||||
make_lowmem_page_readwrite(&per_cpu__gdt_page);
|
||||
|
||||
for_each_possible_cpu(cpu) {
|
||||
cpus_clear(per_cpu(cpu_sibling_map, cpu));
|
||||
/*
|
||||
* cpu_core_map lives in a per cpu area that is cleared
|
||||
* when the per cpu array is allocated.
|
||||
*
|
||||
* cpus_clear(per_cpu(cpu_core_map, cpu));
|
||||
*/
|
||||
}
|
||||
make_lowmem_page_readwrite(&per_cpu_var(gdt_page));
|
||||
|
||||
xen_setup_vcpu_info_placement();
|
||||
}
|
||||
|
||||
void __init xen_smp_prepare_cpus(unsigned int max_cpus)
|
||||
static void __init xen_smp_prepare_cpus(unsigned int max_cpus)
|
||||
{
|
||||
unsigned cpu;
|
||||
|
||||
for_each_possible_cpu(cpu) {
|
||||
cpus_clear(per_cpu(cpu_sibling_map, cpu));
|
||||
/*
|
||||
* cpu_core_ map will be zeroed when the per
|
||||
* cpu area is allocated.
|
||||
*
|
||||
* cpus_clear(per_cpu(cpu_core_map, cpu));
|
||||
*/
|
||||
}
|
||||
|
||||
smp_store_cpu_info(0);
|
||||
cpu_data(0).x86_max_cores = 1;
|
||||
set_cpu_sibling_map(0);
|
||||
|
||||
if (xen_smp_intr_init(0))
|
||||
|
@ -225,7 +215,7 @@ static __cpuinit int
|
|||
cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
|
||||
{
|
||||
struct vcpu_guest_context *ctxt;
|
||||
struct gdt_page *gdt = &per_cpu(gdt_page, cpu);
|
||||
struct desc_struct *gdt;
|
||||
|
||||
if (cpu_test_and_set(cpu, xen_cpu_initialized_map))
|
||||
return 0;
|
||||
|
@ -234,12 +224,15 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
|
|||
if (ctxt == NULL)
|
||||
return -ENOMEM;
|
||||
|
||||
gdt = get_cpu_gdt_table(cpu);
|
||||
|
||||
ctxt->flags = VGCF_IN_KERNEL;
|
||||
ctxt->user_regs.ds = __USER_DS;
|
||||
ctxt->user_regs.es = __USER_DS;
|
||||
ctxt->user_regs.fs = __KERNEL_PERCPU;
|
||||
ctxt->user_regs.gs = 0;
|
||||
ctxt->user_regs.ss = __KERNEL_DS;
|
||||
#ifdef CONFIG_X86_32
|
||||
ctxt->user_regs.fs = __KERNEL_PERCPU;
|
||||
#endif
|
||||
ctxt->user_regs.eip = (unsigned long)cpu_bringup_and_idle;
|
||||
ctxt->user_regs.eflags = 0x1000; /* IOPL_RING1 */
|
||||
|
||||
|
@ -249,11 +242,11 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
|
|||
|
||||
ctxt->ldt_ents = 0;
|
||||
|
||||
BUG_ON((unsigned long)gdt->gdt & ~PAGE_MASK);
|
||||
make_lowmem_page_readonly(gdt->gdt);
|
||||
BUG_ON((unsigned long)gdt & ~PAGE_MASK);
|
||||
make_lowmem_page_readonly(gdt);
|
||||
|
||||
ctxt->gdt_frames[0] = virt_to_mfn(gdt->gdt);
|
||||
ctxt->gdt_ents = ARRAY_SIZE(gdt->gdt);
|
||||
ctxt->gdt_frames[0] = virt_to_mfn(gdt);
|
||||
ctxt->gdt_ents = GDT_ENTRIES;
|
||||
|
||||
ctxt->user_regs.cs = __KERNEL_CS;
|
||||
ctxt->user_regs.esp = idle->thread.sp0 - sizeof(struct pt_regs);
|
||||
|
@ -261,9 +254,11 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
|
|||
ctxt->kernel_ss = __KERNEL_DS;
|
||||
ctxt->kernel_sp = idle->thread.sp0;
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
ctxt->event_callback_cs = __KERNEL_CS;
|
||||
ctxt->event_callback_eip = (unsigned long)xen_hypervisor_callback;
|
||||
ctxt->failsafe_callback_cs = __KERNEL_CS;
|
||||
#endif
|
||||
ctxt->event_callback_eip = (unsigned long)xen_hypervisor_callback;
|
||||
ctxt->failsafe_callback_eip = (unsigned long)xen_failsafe_callback;
|
||||
|
||||
per_cpu(xen_cr3, cpu) = __pa(swapper_pg_dir);
|
||||
|
@ -276,7 +271,7 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
|
|||
return 0;
|
||||
}
|
||||
|
||||
int __cpuinit xen_cpu_up(unsigned int cpu)
|
||||
static int __cpuinit xen_cpu_up(unsigned int cpu)
|
||||
{
|
||||
struct task_struct *idle = idle_task(cpu);
|
||||
int rc;
|
||||
|
@ -287,11 +282,28 @@ int __cpuinit xen_cpu_up(unsigned int cpu)
|
|||
return rc;
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
/* Allocate node local memory for AP pdas */
|
||||
WARN_ON(cpu == 0);
|
||||
if (cpu > 0) {
|
||||
rc = get_local_pda(cpu);
|
||||
if (rc)
|
||||
return rc;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
init_gdt(cpu);
|
||||
per_cpu(current_task, cpu) = idle;
|
||||
irq_ctx_init(cpu);
|
||||
#else
|
||||
cpu_pda(cpu)->pcurrent = idle;
|
||||
clear_tsk_thread_flag(idle, TIF_FORK);
|
||||
#endif
|
||||
xen_setup_timer(cpu);
|
||||
|
||||
per_cpu(cpu_state, cpu) = CPU_UP_PREPARE;
|
||||
|
||||
/* make sure interrupts start blocked */
|
||||
per_cpu(xen_vcpu, cpu)->evtchn_upcall_mask = 1;
|
||||
|
||||
|
@ -306,20 +318,18 @@ int __cpuinit xen_cpu_up(unsigned int cpu)
|
|||
if (rc)
|
||||
return rc;
|
||||
|
||||
smp_store_cpu_info(cpu);
|
||||
set_cpu_sibling_map(cpu);
|
||||
/* This must be done before setting cpu_online_map */
|
||||
wmb();
|
||||
|
||||
cpu_set(cpu, cpu_online_map);
|
||||
|
||||
rc = HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL);
|
||||
BUG_ON(rc);
|
||||
|
||||
while(per_cpu(cpu_state, cpu) != CPU_ONLINE) {
|
||||
HYPERVISOR_sched_op(SCHEDOP_yield, 0);
|
||||
barrier();
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void xen_smp_cpus_done(unsigned int max_cpus)
|
||||
static void xen_smp_cpus_done(unsigned int max_cpus)
|
||||
{
|
||||
}
|
||||
|
||||
|
@ -335,12 +345,12 @@ static void stop_self(void *v)
|
|||
BUG();
|
||||
}
|
||||
|
||||
void xen_smp_send_stop(void)
|
||||
static void xen_smp_send_stop(void)
|
||||
{
|
||||
smp_call_function(stop_self, NULL, 0);
|
||||
}
|
||||
|
||||
void xen_smp_send_reschedule(int cpu)
|
||||
static void xen_smp_send_reschedule(int cpu)
|
||||
{
|
||||
xen_send_IPI_one(cpu, XEN_RESCHEDULE_VECTOR);
|
||||
}
|
||||
|
@ -355,7 +365,7 @@ static void xen_send_IPI_mask(cpumask_t mask, enum ipi_vector vector)
|
|||
xen_send_IPI_one(cpu, vector);
|
||||
}
|
||||
|
||||
void xen_smp_send_call_function_ipi(cpumask_t mask)
|
||||
static void xen_smp_send_call_function_ipi(cpumask_t mask)
|
||||
{
|
||||
int cpu;
|
||||
|
||||
|
@ -370,7 +380,7 @@ void xen_smp_send_call_function_ipi(cpumask_t mask)
|
|||
}
|
||||
}
|
||||
|
||||
void xen_smp_send_call_function_single_ipi(int cpu)
|
||||
static void xen_smp_send_call_function_single_ipi(int cpu)
|
||||
{
|
||||
xen_send_IPI_mask(cpumask_of_cpu(cpu), XEN_CALL_FUNCTION_SINGLE_VECTOR);
|
||||
}
|
||||
|
@ -379,7 +389,11 @@ static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id)
|
|||
{
|
||||
irq_enter();
|
||||
generic_smp_call_function_interrupt();
|
||||
#ifdef CONFIG_X86_32
|
||||
__get_cpu_var(irq_stat).irq_call_count++;
|
||||
#else
|
||||
add_pda(irq_call_count, 1);
|
||||
#endif
|
||||
irq_exit();
|
||||
|
||||
return IRQ_HANDLED;
|
||||
|
@ -389,8 +403,31 @@ static irqreturn_t xen_call_function_single_interrupt(int irq, void *dev_id)
|
|||
{
|
||||
irq_enter();
|
||||
generic_smp_call_function_single_interrupt();
|
||||
#ifdef CONFIG_X86_32
|
||||
__get_cpu_var(irq_stat).irq_call_count++;
|
||||
#else
|
||||
add_pda(irq_call_count, 1);
|
||||
#endif
|
||||
irq_exit();
|
||||
|
||||
return IRQ_HANDLED;
|
||||
}
|
||||
|
||||
static const struct smp_ops xen_smp_ops __initdata = {
|
||||
.smp_prepare_boot_cpu = xen_smp_prepare_boot_cpu,
|
||||
.smp_prepare_cpus = xen_smp_prepare_cpus,
|
||||
.cpu_up = xen_cpu_up,
|
||||
.smp_cpus_done = xen_smp_cpus_done,
|
||||
|
||||
.smp_send_stop = xen_smp_send_stop,
|
||||
.smp_send_reschedule = xen_smp_send_reschedule,
|
||||
|
||||
.send_call_func_ipi = xen_smp_send_call_function_ipi,
|
||||
.send_call_func_single_ipi = xen_smp_send_call_function_single_ipi,
|
||||
};
|
||||
|
||||
void __init xen_smp_init(void)
|
||||
{
|
||||
smp_ops = xen_smp_ops;
|
||||
xen_fill_possible_map();
|
||||
}
|
||||
|
|
|
@ -38,8 +38,11 @@ void xen_post_suspend(int suspend_cancelled)
|
|||
xen_cpu_initialized_map = cpu_online_map;
|
||||
#endif
|
||||
xen_vcpu_restore();
|
||||
xen_timer_resume();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void xen_arch_resume(void)
|
||||
{
|
||||
/* nothing */
|
||||
}
|
||||
|
|
|
@ -0,0 +1,271 @@
|
|||
/*
|
||||
Asm versions of Xen pv-ops, suitable for either direct use or inlining.
|
||||
The inline versions are the same as the direct-use versions, with the
|
||||
pre- and post-amble chopped off.
|
||||
|
||||
This code is encoded for size rather than absolute efficiency,
|
||||
with a view to being able to inline as much as possible.
|
||||
|
||||
We only bother with direct forms (ie, vcpu in pda) of the operations
|
||||
here; the indirect forms are better handled in C, since they're
|
||||
generally too large to inline anyway.
|
||||
*/
|
||||
|
||||
#include <linux/linkage.h>
|
||||
|
||||
#include <asm/asm-offsets.h>
|
||||
#include <asm/processor-flags.h>
|
||||
#include <asm/errno.h>
|
||||
#include <asm/segment.h>
|
||||
|
||||
#include <xen/interface/xen.h>
|
||||
|
||||
#define RELOC(x, v) .globl x##_reloc; x##_reloc=v
|
||||
#define ENDPATCH(x) .globl x##_end; x##_end=.
|
||||
|
||||
/* Pseudo-flag used for virtual NMI, which we don't implement yet */
|
||||
#define XEN_EFLAGS_NMI 0x80000000
|
||||
|
||||
#if 0
|
||||
#include <asm/percpu.h>
|
||||
|
||||
/*
|
||||
Enable events. This clears the event mask and tests the pending
|
||||
event status with one and operation. If there are pending
|
||||
events, then enter the hypervisor to get them handled.
|
||||
*/
|
||||
ENTRY(xen_irq_enable_direct)
|
||||
/* Unmask events */
|
||||
movb $0, PER_CPU_VAR(xen_vcpu_info, XEN_vcpu_info_mask)
|
||||
|
||||
/* Preempt here doesn't matter because that will deal with
|
||||
any pending interrupts. The pending check may end up being
|
||||
run on the wrong CPU, but that doesn't hurt. */
|
||||
|
||||
/* Test for pending */
|
||||
testb $0xff, PER_CPU_VAR(xen_vcpu_info, XEN_vcpu_info_pending)
|
||||
jz 1f
|
||||
|
||||
2: call check_events
|
||||
1:
|
||||
ENDPATCH(xen_irq_enable_direct)
|
||||
ret
|
||||
ENDPROC(xen_irq_enable_direct)
|
||||
RELOC(xen_irq_enable_direct, 2b+1)
|
||||
|
||||
/*
|
||||
Disabling events is simply a matter of making the event mask
|
||||
non-zero.
|
||||
*/
|
||||
ENTRY(xen_irq_disable_direct)
|
||||
movb $1, PER_CPU_VAR(xen_vcpu_info, XEN_vcpu_info_mask)
|
||||
ENDPATCH(xen_irq_disable_direct)
|
||||
ret
|
||||
ENDPROC(xen_irq_disable_direct)
|
||||
RELOC(xen_irq_disable_direct, 0)
|
||||
|
||||
/*
|
||||
(xen_)save_fl is used to get the current interrupt enable status.
|
||||
Callers expect the status to be in X86_EFLAGS_IF, and other bits
|
||||
may be set in the return value. We take advantage of this by
|
||||
making sure that X86_EFLAGS_IF has the right value (and other bits
|
||||
in that byte are 0), but other bits in the return value are
|
||||
undefined. We need to toggle the state of the bit, because
|
||||
Xen and x86 use opposite senses (mask vs enable).
|
||||
*/
|
||||
ENTRY(xen_save_fl_direct)
|
||||
testb $0xff, PER_CPU_VAR(xen_vcpu_info, XEN_vcpu_info_mask)
|
||||
setz %ah
|
||||
addb %ah,%ah
|
||||
ENDPATCH(xen_save_fl_direct)
|
||||
ret
|
||||
ENDPROC(xen_save_fl_direct)
|
||||
RELOC(xen_save_fl_direct, 0)
|
||||
|
||||
/*
|
||||
In principle the caller should be passing us a value return
|
||||
from xen_save_fl_direct, but for robustness sake we test only
|
||||
the X86_EFLAGS_IF flag rather than the whole byte. After
|
||||
setting the interrupt mask state, it checks for unmasked
|
||||
pending events and enters the hypervisor to get them delivered
|
||||
if so.
|
||||
*/
|
||||
ENTRY(xen_restore_fl_direct)
|
||||
testb $X86_EFLAGS_IF>>8, %ah
|
||||
setz PER_CPU_VAR(xen_vcpu_info, XEN_vcpu_info_mask)
|
||||
/* Preempt here doesn't matter because that will deal with
|
||||
any pending interrupts. The pending check may end up being
|
||||
run on the wrong CPU, but that doesn't hurt. */
|
||||
|
||||
/* check for unmasked and pending */
|
||||
cmpw $0x0001, PER_CPU_VAR(xen_vcpu_info, XEN_vcpu_info_pending)
|
||||
jz 1f
|
||||
2: call check_events
|
||||
1:
|
||||
ENDPATCH(xen_restore_fl_direct)
|
||||
ret
|
||||
ENDPROC(xen_restore_fl_direct)
|
||||
RELOC(xen_restore_fl_direct, 2b+1)
|
||||
|
||||
|
||||
/*
|
||||
Force an event check by making a hypercall,
|
||||
but preserve regs before making the call.
|
||||
*/
|
||||
check_events:
|
||||
push %rax
|
||||
push %rcx
|
||||
push %rdx
|
||||
push %rsi
|
||||
push %rdi
|
||||
push %r8
|
||||
push %r9
|
||||
push %r10
|
||||
push %r11
|
||||
call force_evtchn_callback
|
||||
pop %r11
|
||||
pop %r10
|
||||
pop %r9
|
||||
pop %r8
|
||||
pop %rdi
|
||||
pop %rsi
|
||||
pop %rdx
|
||||
pop %rcx
|
||||
pop %rax
|
||||
ret
|
||||
#endif
|
||||
|
||||
ENTRY(xen_adjust_exception_frame)
|
||||
mov 8+0(%rsp),%rcx
|
||||
mov 8+8(%rsp),%r11
|
||||
ret $16
|
||||
|
||||
hypercall_iret = hypercall_page + __HYPERVISOR_iret * 32
|
||||
/*
|
||||
Xen64 iret frame:
|
||||
|
||||
ss
|
||||
rsp
|
||||
rflags
|
||||
cs
|
||||
rip <-- standard iret frame
|
||||
|
||||
flags
|
||||
|
||||
rcx }
|
||||
r11 }<-- pushed by hypercall page
|
||||
rsp -> rax }
|
||||
*/
|
||||
ENTRY(xen_iret)
|
||||
pushq $0
|
||||
1: jmp hypercall_iret
|
||||
ENDPATCH(xen_iret)
|
||||
RELOC(xen_iret, 1b+1)
|
||||
|
||||
/*
|
||||
sysexit is not used for 64-bit processes, so it's
|
||||
only ever used to return to 32-bit compat userspace.
|
||||
*/
|
||||
ENTRY(xen_sysexit)
|
||||
pushq $__USER32_DS
|
||||
pushq %rcx
|
||||
pushq $X86_EFLAGS_IF
|
||||
pushq $__USER32_CS
|
||||
pushq %rdx
|
||||
|
||||
pushq $VGCF_in_syscall
|
||||
1: jmp hypercall_iret
|
||||
ENDPATCH(xen_sysexit)
|
||||
RELOC(xen_sysexit, 1b+1)
|
||||
|
||||
ENTRY(xen_sysret64)
|
||||
/* We're already on the usermode stack at this point, but still
|
||||
with the kernel gs, so we can easily switch back */
|
||||
movq %rsp, %gs:pda_oldrsp
|
||||
movq %gs:pda_kernelstack,%rsp
|
||||
|
||||
pushq $__USER_DS
|
||||
pushq %gs:pda_oldrsp
|
||||
pushq %r11
|
||||
pushq $__USER_CS
|
||||
pushq %rcx
|
||||
|
||||
pushq $VGCF_in_syscall
|
||||
1: jmp hypercall_iret
|
||||
ENDPATCH(xen_sysret64)
|
||||
RELOC(xen_sysret64, 1b+1)
|
||||
|
||||
ENTRY(xen_sysret32)
|
||||
/* We're already on the usermode stack at this point, but still
|
||||
with the kernel gs, so we can easily switch back */
|
||||
movq %rsp, %gs:pda_oldrsp
|
||||
movq %gs:pda_kernelstack, %rsp
|
||||
|
||||
pushq $__USER32_DS
|
||||
pushq %gs:pda_oldrsp
|
||||
pushq %r11
|
||||
pushq $__USER32_CS
|
||||
pushq %rcx
|
||||
|
||||
pushq $VGCF_in_syscall
|
||||
1: jmp hypercall_iret
|
||||
ENDPATCH(xen_sysret32)
|
||||
RELOC(xen_sysret32, 1b+1)
|
||||
|
||||
/*
|
||||
Xen handles syscall callbacks much like ordinary exceptions,
|
||||
which means we have:
|
||||
- kernel gs
|
||||
- kernel rsp
|
||||
- an iret-like stack frame on the stack (including rcx and r11):
|
||||
ss
|
||||
rsp
|
||||
rflags
|
||||
cs
|
||||
rip
|
||||
r11
|
||||
rsp-> rcx
|
||||
|
||||
In all the entrypoints, we undo all that to make it look
|
||||
like a CPU-generated syscall/sysenter and jump to the normal
|
||||
entrypoint.
|
||||
*/
|
||||
|
||||
.macro undo_xen_syscall
|
||||
mov 0*8(%rsp),%rcx
|
||||
mov 1*8(%rsp),%r11
|
||||
mov 5*8(%rsp),%rsp
|
||||
.endm
|
||||
|
||||
/* Normal 64-bit system call target */
|
||||
ENTRY(xen_syscall_target)
|
||||
undo_xen_syscall
|
||||
jmp system_call_after_swapgs
|
||||
ENDPROC(xen_syscall_target)
|
||||
|
||||
#ifdef CONFIG_IA32_EMULATION
|
||||
|
||||
/* 32-bit compat syscall target */
|
||||
ENTRY(xen_syscall32_target)
|
||||
undo_xen_syscall
|
||||
jmp ia32_cstar_target
|
||||
ENDPROC(xen_syscall32_target)
|
||||
|
||||
/* 32-bit compat sysenter target */
|
||||
ENTRY(xen_sysenter_target)
|
||||
undo_xen_syscall
|
||||
jmp ia32_sysenter_target
|
||||
ENDPROC(xen_sysenter_target)
|
||||
|
||||
#else /* !CONFIG_IA32_EMULATION */
|
||||
|
||||
ENTRY(xen_syscall32_target)
|
||||
ENTRY(xen_sysenter_target)
|
||||
lea 16(%rsp), %rsp /* strip %rcx,%r11 */
|
||||
mov $-ENOSYS, %rax
|
||||
pushq $VGCF_in_syscall
|
||||
jmp hypercall_iret
|
||||
ENDPROC(xen_syscall32_target)
|
||||
ENDPROC(xen_sysenter_target)
|
||||
|
||||
#endif /* CONFIG_IA32_EMULATION */
|
|
@ -5,15 +5,24 @@
|
|||
|
||||
#include <linux/elfnote.h>
|
||||
#include <linux/init.h>
|
||||
|
||||
#include <asm/boot.h>
|
||||
#include <asm/asm.h>
|
||||
#include <asm/page.h>
|
||||
|
||||
#include <xen/interface/elfnote.h>
|
||||
#include <asm/xen/interface.h>
|
||||
|
||||
__INIT
|
||||
ENTRY(startup_xen)
|
||||
movl %esi,xen_start_info
|
||||
cld
|
||||
movl $(init_thread_union+THREAD_SIZE),%esp
|
||||
#ifdef CONFIG_X86_32
|
||||
mov %esi,xen_start_info
|
||||
mov $init_thread_union+THREAD_SIZE,%esp
|
||||
#else
|
||||
mov %rsi,xen_start_info
|
||||
mov $init_thread_union+THREAD_SIZE,%rsp
|
||||
#endif
|
||||
jmp xen_start_kernel
|
||||
|
||||
__FINIT
|
||||
|
@ -21,21 +30,26 @@ ENTRY(startup_xen)
|
|||
.pushsection .text
|
||||
.align PAGE_SIZE_asm
|
||||
ENTRY(hypercall_page)
|
||||
.skip 0x1000
|
||||
.skip PAGE_SIZE_asm
|
||||
.popsection
|
||||
|
||||
ELFNOTE(Xen, XEN_ELFNOTE_GUEST_OS, .asciz "linux")
|
||||
ELFNOTE(Xen, XEN_ELFNOTE_GUEST_VERSION, .asciz "2.6")
|
||||
ELFNOTE(Xen, XEN_ELFNOTE_XEN_VERSION, .asciz "xen-3.0")
|
||||
ELFNOTE(Xen, XEN_ELFNOTE_VIRT_BASE, .long __PAGE_OFFSET)
|
||||
ELFNOTE(Xen, XEN_ELFNOTE_ENTRY, .long startup_xen)
|
||||
ELFNOTE(Xen, XEN_ELFNOTE_HYPERCALL_PAGE, .long hypercall_page)
|
||||
#ifdef CONFIG_X86_32
|
||||
ELFNOTE(Xen, XEN_ELFNOTE_VIRT_BASE, _ASM_PTR __PAGE_OFFSET)
|
||||
#else
|
||||
ELFNOTE(Xen, XEN_ELFNOTE_VIRT_BASE, _ASM_PTR __START_KERNEL_map)
|
||||
#endif
|
||||
ELFNOTE(Xen, XEN_ELFNOTE_ENTRY, _ASM_PTR startup_xen)
|
||||
ELFNOTE(Xen, XEN_ELFNOTE_HYPERCALL_PAGE, _ASM_PTR hypercall_page)
|
||||
ELFNOTE(Xen, XEN_ELFNOTE_FEATURES, .asciz "!writable_page_tables|pae_pgdir_above_4gb")
|
||||
ELFNOTE(Xen, XEN_ELFNOTE_PAE_MODE, .asciz "yes")
|
||||
ELFNOTE(Xen, XEN_ELFNOTE_LOADER, .asciz "generic")
|
||||
ELFNOTE(Xen, XEN_ELFNOTE_L1_MFN_VALID,
|
||||
.quad _PAGE_PRESENT; .quad _PAGE_PRESENT)
|
||||
ELFNOTE(Xen, XEN_ELFNOTE_SUSPEND_CANCEL, .long 1)
|
||||
ELFNOTE(Xen, XEN_ELFNOTE_HV_START_LOW, .long __HYPERVISOR_VIRT_START)
|
||||
ELFNOTE(Xen, XEN_ELFNOTE_HV_START_LOW, _ASM_PTR __HYPERVISOR_VIRT_START)
|
||||
ELFNOTE(Xen, XEN_ELFNOTE_PADDR_OFFSET, _ASM_PTR 0)
|
||||
|
||||
#endif /*CONFIG_XEN */
|
||||
|
|
|
@ -26,6 +26,7 @@ char * __init xen_memory_setup(void);
|
|||
void __init xen_arch_setup(void);
|
||||
void __init xen_init_IRQ(void);
|
||||
void xen_enable_sysenter(void);
|
||||
void xen_enable_syscall(void);
|
||||
void xen_vcpu_restore(void);
|
||||
|
||||
void __init xen_build_dynamic_phys_to_machine(void);
|
||||
|
@ -37,7 +38,6 @@ void __init xen_time_init(void);
|
|||
unsigned long xen_get_wallclock(void);
|
||||
int xen_set_wallclock(unsigned long time);
|
||||
unsigned long long xen_sched_clock(void);
|
||||
void xen_timer_resume(void);
|
||||
|
||||
irqreturn_t xen_debug_interrupt(int irq, void *dev_id);
|
||||
|
||||
|
@ -45,20 +45,15 @@ bool xen_vcpu_stolen(int vcpu);
|
|||
|
||||
void xen_mark_init_mm_pinned(void);
|
||||
|
||||
void __init xen_fill_possible_map(void);
|
||||
|
||||
void __init xen_setup_vcpu_info_placement(void);
|
||||
void xen_smp_prepare_boot_cpu(void);
|
||||
void xen_smp_prepare_cpus(unsigned int max_cpus);
|
||||
int xen_cpu_up(unsigned int cpu);
|
||||
void xen_smp_cpus_done(unsigned int max_cpus);
|
||||
|
||||
void xen_smp_send_stop(void);
|
||||
void xen_smp_send_reschedule(int cpu);
|
||||
void xen_smp_send_call_function_ipi(cpumask_t mask);
|
||||
void xen_smp_send_call_function_single_ipi(int cpu);
|
||||
#ifdef CONFIG_SMP
|
||||
void xen_smp_init(void);
|
||||
|
||||
extern cpumask_t xen_cpu_initialized_map;
|
||||
#else
|
||||
static inline void xen_smp_init(void) {}
|
||||
#endif
|
||||
|
||||
|
||||
/* Declare an asm function, along with symbols needed to make it
|
||||
|
@ -73,7 +68,11 @@ DECL_ASM(void, xen_irq_disable_direct, void);
|
|||
DECL_ASM(unsigned long, xen_save_fl_direct, void);
|
||||
DECL_ASM(void, xen_restore_fl_direct, unsigned long);
|
||||
|
||||
/* These are not functions, and cannot be called normally */
|
||||
void xen_iret(void);
|
||||
void xen_sysexit(void);
|
||||
void xen_sysret32(void);
|
||||
void xen_sysret64(void);
|
||||
void xen_adjust_exception_frame(void);
|
||||
|
||||
#endif /* XEN_OPS_H */
|
||||
|
|
|
@ -92,7 +92,7 @@ struct netfront_info {
|
|||
*/
|
||||
union skb_entry {
|
||||
struct sk_buff *skb;
|
||||
unsigned link;
|
||||
unsigned long link;
|
||||
} tx_skbs[NET_TX_RING_SIZE];
|
||||
grant_ref_t gref_tx_head;
|
||||
grant_ref_t grant_tx_ref[NET_TX_RING_SIZE];
|
||||
|
@ -125,6 +125,17 @@ struct netfront_rx_info {
|
|||
struct xen_netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX - 1];
|
||||
};
|
||||
|
||||
static void skb_entry_set_link(union skb_entry *list, unsigned short id)
|
||||
{
|
||||
list->link = id;
|
||||
}
|
||||
|
||||
static int skb_entry_is_link(const union skb_entry *list)
|
||||
{
|
||||
BUILD_BUG_ON(sizeof(list->skb) != sizeof(list->link));
|
||||
return ((unsigned long)list->skb < PAGE_OFFSET);
|
||||
}
|
||||
|
||||
/*
|
||||
* Access macros for acquiring freeing slots in tx_skbs[].
|
||||
*/
|
||||
|
@ -132,7 +143,7 @@ struct netfront_rx_info {
|
|||
static void add_id_to_freelist(unsigned *head, union skb_entry *list,
|
||||
unsigned short id)
|
||||
{
|
||||
list[id].link = *head;
|
||||
skb_entry_set_link(&list[id], *head);
|
||||
*head = id;
|
||||
}
|
||||
|
||||
|
@ -993,7 +1004,7 @@ static void xennet_release_tx_bufs(struct netfront_info *np)
|
|||
|
||||
for (i = 0; i < NET_TX_RING_SIZE; i++) {
|
||||
/* Skip over entries which are actually freelist references */
|
||||
if ((unsigned long)np->tx_skbs[i].skb < PAGE_OFFSET)
|
||||
if (skb_entry_is_link(&np->tx_skbs[i]))
|
||||
continue;
|
||||
|
||||
skb = np->tx_skbs[i].skb;
|
||||
|
@ -1123,7 +1134,7 @@ static struct net_device * __devinit xennet_create_dev(struct xenbus_device *dev
|
|||
/* Initialise tx_skbs as a free chain containing every entry. */
|
||||
np->tx_skb_freelist = 0;
|
||||
for (i = 0; i < NET_TX_RING_SIZE; i++) {
|
||||
np->tx_skbs[i].link = i+1;
|
||||
skb_entry_set_link(&np->tx_skbs[i], i+1);
|
||||
np->grant_tx_ref[i] = GRANT_INVALID_REF;
|
||||
}
|
||||
|
||||
|
|
|
@ -37,7 +37,7 @@
|
|||
#include "intel-iommu.h"
|
||||
#include <asm/proto.h> /* force_iommu in this header in x86-64*/
|
||||
#include <asm/cacheflush.h>
|
||||
#include <asm/gart.h>
|
||||
#include <asm/iommu.h>
|
||||
#include "pci.h"
|
||||
|
||||
#define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
|
||||
|
|
|
@ -63,11 +63,12 @@ static int xen_suspend(void *data)
|
|||
gnttab_resume();
|
||||
xen_mm_unpin_all();
|
||||
|
||||
device_power_up();
|
||||
device_power_up(PMSG_RESUME);
|
||||
|
||||
if (!*cancelled) {
|
||||
xen_irq_resume();
|
||||
xen_console_resume();
|
||||
xen_timer_resume();
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
@ -107,12 +108,13 @@ static void do_suspend(void)
|
|||
goto out;
|
||||
}
|
||||
|
||||
if (!cancelled)
|
||||
if (!cancelled) {
|
||||
xen_arch_resume();
|
||||
xenbus_resume();
|
||||
else
|
||||
} else
|
||||
xenbus_suspend_cancel();
|
||||
|
||||
device_resume();
|
||||
device_resume(PMSG_RESUME);
|
||||
|
||||
/* Make sure timer events get retriggered on all CPUs */
|
||||
clock_was_set();
|
||||
|
|
|
@ -27,13 +27,12 @@
|
|||
/*
|
||||
* some size calculation constants
|
||||
*/
|
||||
#define DEV_TABLE_ENTRY_SIZE 256
|
||||
#define DEV_TABLE_ENTRY_SIZE 32
|
||||
#define ALIAS_TABLE_ENTRY_SIZE 2
|
||||
#define RLOOKUP_TABLE_ENTRY_SIZE (sizeof(void *))
|
||||
|
||||
/* helper macros */
|
||||
#define LOW_U32(x) ((x) & ((1ULL << 32)-1))
|
||||
#define HIGH_U32(x) (LOW_U32((x) >> 32))
|
||||
|
||||
/* Length of the MMIO region for the AMD IOMMU */
|
||||
#define MMIO_REGION_LENGTH 0x4000
|
||||
|
@ -158,78 +157,170 @@
|
|||
|
||||
#define MAX_DOMAIN_ID 65536
|
||||
|
||||
/*
|
||||
* This structure contains generic data for IOMMU protection domains
|
||||
* independent of their use.
|
||||
*/
|
||||
struct protection_domain {
|
||||
spinlock_t lock;
|
||||
u16 id;
|
||||
int mode;
|
||||
u64 *pt_root;
|
||||
void *priv;
|
||||
spinlock_t lock; /* mostly used to lock the page table*/
|
||||
u16 id; /* the domain id written to the device table */
|
||||
int mode; /* paging mode (0-6 levels) */
|
||||
u64 *pt_root; /* page table root pointer */
|
||||
void *priv; /* private data */
|
||||
};
|
||||
|
||||
/*
|
||||
* Data container for a dma_ops specific protection domain
|
||||
*/
|
||||
struct dma_ops_domain {
|
||||
struct list_head list;
|
||||
|
||||
/* generic protection domain information */
|
||||
struct protection_domain domain;
|
||||
|
||||
/* size of the aperture for the mappings */
|
||||
unsigned long aperture_size;
|
||||
|
||||
/* address we start to search for free addresses */
|
||||
unsigned long next_bit;
|
||||
|
||||
/* address allocation bitmap */
|
||||
unsigned long *bitmap;
|
||||
|
||||
/*
|
||||
* Array of PTE pages for the aperture. In this array we save all the
|
||||
* leaf pages of the domain page table used for the aperture. This way
|
||||
* we don't need to walk the page table to find a specific PTE. We can
|
||||
* just calculate its address in constant time.
|
||||
*/
|
||||
u64 **pte_pages;
|
||||
};
|
||||
|
||||
/*
|
||||
* Structure where we save information about one hardware AMD IOMMU in the
|
||||
* system.
|
||||
*/
|
||||
struct amd_iommu {
|
||||
struct list_head list;
|
||||
|
||||
/* locks the accesses to the hardware */
|
||||
spinlock_t lock;
|
||||
|
||||
/* device id of this IOMMU */
|
||||
u16 devid;
|
||||
/*
|
||||
* Capability pointer. There could be more than one IOMMU per PCI
|
||||
* device function if there are more than one AMD IOMMU capability
|
||||
* pointers.
|
||||
*/
|
||||
u16 cap_ptr;
|
||||
|
||||
/* physical address of MMIO space */
|
||||
u64 mmio_phys;
|
||||
/* virtual address of MMIO space */
|
||||
u8 *mmio_base;
|
||||
|
||||
/* capabilities of that IOMMU read from ACPI */
|
||||
u32 cap;
|
||||
|
||||
/* first device this IOMMU handles. read from PCI */
|
||||
u16 first_device;
|
||||
/* last device this IOMMU handles. read from PCI */
|
||||
u16 last_device;
|
||||
|
||||
/* start of exclusion range of that IOMMU */
|
||||
u64 exclusion_start;
|
||||
/* length of exclusion range of that IOMMU */
|
||||
u64 exclusion_length;
|
||||
|
||||
/* command buffer virtual address */
|
||||
u8 *cmd_buf;
|
||||
/* size of command buffer */
|
||||
u32 cmd_buf_size;
|
||||
|
||||
/* if one, we need to send a completion wait command */
|
||||
int need_sync;
|
||||
|
||||
/* default dma_ops domain for that IOMMU */
|
||||
struct dma_ops_domain *default_dom;
|
||||
};
|
||||
|
||||
/*
|
||||
* List with all IOMMUs in the system. This list is not locked because it is
|
||||
* only written and read at driver initialization or suspend time
|
||||
*/
|
||||
extern struct list_head amd_iommu_list;
|
||||
|
||||
/*
|
||||
* Structure defining one entry in the device table
|
||||
*/
|
||||
struct dev_table_entry {
|
||||
u32 data[8];
|
||||
};
|
||||
|
||||
/*
|
||||
* One entry for unity mappings parsed out of the ACPI table.
|
||||
*/
|
||||
struct unity_map_entry {
|
||||
struct list_head list;
|
||||
|
||||
/* starting device id this entry is used for (including) */
|
||||
u16 devid_start;
|
||||
/* end device id this entry is used for (including) */
|
||||
u16 devid_end;
|
||||
|
||||
/* start address to unity map (including) */
|
||||
u64 address_start;
|
||||
/* end address to unity map (including) */
|
||||
u64 address_end;
|
||||
|
||||
/* required protection */
|
||||
int prot;
|
||||
};
|
||||
|
||||
/*
|
||||
* List of all unity mappings. It is not locked because as runtime it is only
|
||||
* read. It is created at ACPI table parsing time.
|
||||
*/
|
||||
extern struct list_head amd_iommu_unity_map;
|
||||
|
||||
/* data structures for device handling */
|
||||
/*
|
||||
* Data structures for device handling
|
||||
*/
|
||||
|
||||
/*
|
||||
* Device table used by hardware. Read and write accesses by software are
|
||||
* locked with the amd_iommu_pd_table lock.
|
||||
*/
|
||||
extern struct dev_table_entry *amd_iommu_dev_table;
|
||||
|
||||
/*
|
||||
* Alias table to find requestor ids to device ids. Not locked because only
|
||||
* read on runtime.
|
||||
*/
|
||||
extern u16 *amd_iommu_alias_table;
|
||||
|
||||
/*
|
||||
* Reverse lookup table to find the IOMMU which translates a specific device.
|
||||
*/
|
||||
extern struct amd_iommu **amd_iommu_rlookup_table;
|
||||
|
||||
/* size of the dma_ops aperture as power of 2 */
|
||||
extern unsigned amd_iommu_aperture_order;
|
||||
|
||||
/* largest PCI device id we expect translation requests for */
|
||||
extern u16 amd_iommu_last_bdf;
|
||||
|
||||
/* data structures for protection domain handling */
|
||||
extern struct protection_domain **amd_iommu_pd_table;
|
||||
|
||||
/* allocation bitmap for domain ids */
|
||||
extern unsigned long *amd_iommu_pd_alloc_bitmap;
|
||||
|
||||
/* will be 1 if device isolation is enabled */
|
||||
extern int amd_iommu_isolate;
|
||||
|
||||
/* takes a PCI device id and prints it out in a readable form */
|
||||
static inline void print_devid(u16 devid, int nl)
|
||||
{
|
||||
int bus = devid >> 8;
|
||||
|
@ -241,4 +332,11 @@ static inline void print_devid(u16 devid, int nl)
|
|||
printk("\n");
|
||||
}
|
||||
|
||||
/* takes bus and device/function and returns the device id
|
||||
* FIXME: should that be in generic PCI code? */
|
||||
static inline u16 calc_devid(u8 bus, u8 devfn)
|
||||
{
|
||||
return (((u16)bus) << 8) | devfn;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
|
@ -3,6 +3,8 @@
|
|||
|
||||
#include <linux/pm.h>
|
||||
#include <linux/delay.h>
|
||||
|
||||
#include <asm/alternative.h>
|
||||
#include <asm/fixmap.h>
|
||||
#include <asm/apicdef.h>
|
||||
#include <asm/processor.h>
|
||||
|
@ -10,7 +12,7 @@
|
|||
|
||||
#define ARCH_APICTIMER_STOPS_ON_C3 1
|
||||
|
||||
#define Dprintk(x...)
|
||||
#define Dprintk printk
|
||||
|
||||
/*
|
||||
* Debugging macros
|
||||
|
@ -35,7 +37,7 @@ extern void generic_apic_probe(void);
|
|||
|
||||
#ifdef CONFIG_X86_LOCAL_APIC
|
||||
|
||||
extern int apic_verbosity;
|
||||
extern unsigned int apic_verbosity;
|
||||
extern int local_apic_timer_c2_ok;
|
||||
|
||||
extern int ioapic_force;
|
||||
|
@ -48,7 +50,6 @@ extern int disable_apic;
|
|||
#include <asm/paravirt.h>
|
||||
#else
|
||||
#define apic_write native_apic_write
|
||||
#define apic_write_atomic native_apic_write_atomic
|
||||
#define apic_read native_apic_read
|
||||
#define setup_boot_clock setup_boot_APIC_clock
|
||||
#define setup_secondary_clock setup_secondary_APIC_clock
|
||||
|
@ -58,12 +59,11 @@ extern int is_vsmp_box(void);
|
|||
|
||||
static inline void native_apic_write(unsigned long reg, u32 v)
|
||||
{
|
||||
*((volatile u32 *)(APIC_BASE + reg)) = v;
|
||||
}
|
||||
volatile u32 *addr = (volatile u32 *)(APIC_BASE + reg);
|
||||
|
||||
static inline void native_apic_write_atomic(unsigned long reg, u32 v)
|
||||
{
|
||||
(void)xchg((u32 *)(APIC_BASE + reg), v);
|
||||
alternative_io("movl %0, %1", "xchgl %0, %1", X86_FEATURE_11AP,
|
||||
ASM_OUTPUT2("=r" (v), "=m" (*addr)),
|
||||
ASM_OUTPUT2("0" (v), "m" (*addr)));
|
||||
}
|
||||
|
||||
static inline u32 native_apic_read(unsigned long reg)
|
||||
|
@ -75,16 +75,6 @@ extern void apic_wait_icr_idle(void);
|
|||
extern u32 safe_apic_wait_icr_idle(void);
|
||||
extern int get_physical_broadcast(void);
|
||||
|
||||
#ifdef CONFIG_X86_GOOD_APIC
|
||||
# define FORCE_READ_AROUND_WRITE 0
|
||||
# define apic_read_around(x)
|
||||
# define apic_write_around(x, y) apic_write((x), (y))
|
||||
#else
|
||||
# define FORCE_READ_AROUND_WRITE 1
|
||||
# define apic_read_around(x) apic_read(x)
|
||||
# define apic_write_around(x, y) apic_write_atomic((x), (y))
|
||||
#endif
|
||||
|
||||
static inline void ack_APIC_irq(void)
|
||||
{
|
||||
/*
|
||||
|
@ -95,7 +85,7 @@ static inline void ack_APIC_irq(void)
|
|||
*/
|
||||
|
||||
/* Docs say use 0 for future compatibility */
|
||||
apic_write_around(APIC_EOI, 0);
|
||||
apic_write(APIC_EOI, 0);
|
||||
}
|
||||
|
||||
extern int lapic_get_maxlvt(void);
|
||||
|
|
|
@ -21,6 +21,7 @@ extern void intr_init_hook(void);
|
|||
extern void pre_intr_init_hook(void);
|
||||
extern void pre_setup_arch_hook(void);
|
||||
extern void trap_init_hook(void);
|
||||
extern void pre_time_init_hook(void);
|
||||
extern void time_init_hook(void);
|
||||
extern void mca_nmi_hook(void);
|
||||
|
||||
|
|
|
@ -356,7 +356,7 @@ static inline unsigned long ffz(unsigned long word)
|
|||
* __fls: find last set bit in word
|
||||
* @word: The word to search
|
||||
*
|
||||
* Undefined if no zero exists, so code should check against ~0UL first.
|
||||
* Undefined if no set bit exists, so code should check against 0 first.
|
||||
*/
|
||||
static inline unsigned long __fls(unsigned long word)
|
||||
{
|
||||
|
|
|
@ -104,7 +104,7 @@
|
|||
.endif
|
||||
.endm
|
||||
|
||||
.macro LOAD_ARGS offset
|
||||
.macro LOAD_ARGS offset, skiprax=0
|
||||
movq \offset(%rsp), %r11
|
||||
movq \offset+8(%rsp), %r10
|
||||
movq \offset+16(%rsp), %r9
|
||||
|
@ -113,7 +113,10 @@
|
|||
movq \offset+48(%rsp), %rdx
|
||||
movq \offset+56(%rsp), %rsi
|
||||
movq \offset+64(%rsp), %rdi
|
||||
.if \skiprax
|
||||
.else
|
||||
movq \offset+72(%rsp), %rax
|
||||
.endif
|
||||
.endm
|
||||
|
||||
#define REST_SKIP 6*8
|
||||
|
@ -165,4 +168,3 @@
|
|||
.macro icebp
|
||||
.byte 0xf1
|
||||
.endm
|
||||
|
||||
|
|
|
@ -79,6 +79,7 @@
|
|||
#define X86_FEATURE_REP_GOOD (3*32+16) /* rep microcode works well on this CPU */
|
||||
#define X86_FEATURE_MFENCE_RDTSC (3*32+17) /* Mfence synchronizes RDTSC */
|
||||
#define X86_FEATURE_LFENCE_RDTSC (3*32+18) /* Lfence synchronizes RDTSC */
|
||||
#define X86_FEATURE_11AP (3*32+19) /* Bad local APIC aka 11AP */
|
||||
|
||||
/* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */
|
||||
#define X86_FEATURE_XMM3 (4*32+ 0) /* Streaming SIMD Extensions-3 */
|
||||
|
|
|
@ -14,7 +14,6 @@ extern dma_addr_t bad_dma_address;
|
|||
extern int iommu_merge;
|
||||
extern struct device fallback_dev;
|
||||
extern int panic_on_overflow;
|
||||
extern int forbid_dac;
|
||||
extern int force_iommu;
|
||||
|
||||
struct dma_mapping_ops {
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue