Merge branch 'xen-upstream' of git://git.kernel.org/pub/scm/linux/kernel/git/jeremy/xen
* 'xen-upstream' of git://git.kernel.org/pub/scm/linux/kernel/git/jeremy/xen: xfs: eagerly remove vmap mappings to avoid upsetting Xen xen: add some debug output for failed multicalls xen: fix incorrect vcpu_register_vcpu_info hypercall argument xen: ask the hypervisor how much space it needs reserved xen: lock pte pages while pinning/unpinning xen: deal with stale cr3 values when unpinning pagetables xen: add batch completion callbacks xen: yield to IPI target if necessary Clean up duplicate includes in arch/i386/xen/ remove dead code in pgtable_cache_init paravirt: clean up lazy mode handling paravirt: refactor struct paravirt_ops into smaller pv_*_ops
This commit is contained in:
commit
fb9fc39517
|
@ -369,7 +369,7 @@ void apply_paravirt(struct paravirt_patch_site *start,
|
||||||
BUG_ON(p->len > MAX_PATCH_LEN);
|
BUG_ON(p->len > MAX_PATCH_LEN);
|
||||||
/* prep the buffer with the original instructions */
|
/* prep the buffer with the original instructions */
|
||||||
memcpy(insnbuf, p->instr, p->len);
|
memcpy(insnbuf, p->instr, p->len);
|
||||||
used = paravirt_ops.patch(p->instrtype, p->clobbers, insnbuf,
|
used = pv_init_ops.patch(p->instrtype, p->clobbers, insnbuf,
|
||||||
(unsigned long)p->instr, p->len);
|
(unsigned long)p->instr, p->len);
|
||||||
|
|
||||||
BUG_ON(used > p->len);
|
BUG_ON(used > p->len);
|
||||||
|
|
|
@ -116,12 +116,14 @@ void foo(void)
|
||||||
|
|
||||||
#ifdef CONFIG_PARAVIRT
|
#ifdef CONFIG_PARAVIRT
|
||||||
BLANK();
|
BLANK();
|
||||||
OFFSET(PARAVIRT_enabled, paravirt_ops, paravirt_enabled);
|
OFFSET(PARAVIRT_enabled, pv_info, paravirt_enabled);
|
||||||
OFFSET(PARAVIRT_irq_disable, paravirt_ops, irq_disable);
|
OFFSET(PARAVIRT_PATCH_pv_cpu_ops, paravirt_patch_template, pv_cpu_ops);
|
||||||
OFFSET(PARAVIRT_irq_enable, paravirt_ops, irq_enable);
|
OFFSET(PARAVIRT_PATCH_pv_irq_ops, paravirt_patch_template, pv_irq_ops);
|
||||||
OFFSET(PARAVIRT_irq_enable_sysexit, paravirt_ops, irq_enable_sysexit);
|
OFFSET(PV_IRQ_irq_disable, pv_irq_ops, irq_disable);
|
||||||
OFFSET(PARAVIRT_iret, paravirt_ops, iret);
|
OFFSET(PV_IRQ_irq_enable, pv_irq_ops, irq_enable);
|
||||||
OFFSET(PARAVIRT_read_cr0, paravirt_ops, read_cr0);
|
OFFSET(PV_CPU_iret, pv_cpu_ops, iret);
|
||||||
|
OFFSET(PV_CPU_irq_enable_sysexit, pv_cpu_ops, irq_enable_sysexit);
|
||||||
|
OFFSET(PV_CPU_read_cr0, pv_cpu_ops, read_cr0);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef CONFIG_XEN
|
#ifdef CONFIG_XEN
|
||||||
|
|
|
@ -437,7 +437,7 @@ ldt_ss:
|
||||||
* is still available to implement the setting of the high
|
* is still available to implement the setting of the high
|
||||||
* 16-bits in the INTERRUPT_RETURN paravirt-op.
|
* 16-bits in the INTERRUPT_RETURN paravirt-op.
|
||||||
*/
|
*/
|
||||||
cmpl $0, paravirt_ops+PARAVIRT_enabled
|
cmpl $0, pv_info+PARAVIRT_enabled
|
||||||
jne restore_nocheck
|
jne restore_nocheck
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
|
@ -42,32 +42,33 @@ void _paravirt_nop(void)
|
||||||
static void __init default_banner(void)
|
static void __init default_banner(void)
|
||||||
{
|
{
|
||||||
printk(KERN_INFO "Booting paravirtualized kernel on %s\n",
|
printk(KERN_INFO "Booting paravirtualized kernel on %s\n",
|
||||||
paravirt_ops.name);
|
pv_info.name);
|
||||||
}
|
}
|
||||||
|
|
||||||
char *memory_setup(void)
|
char *memory_setup(void)
|
||||||
{
|
{
|
||||||
return paravirt_ops.memory_setup();
|
return pv_init_ops.memory_setup();
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Simple instruction patching code. */
|
/* Simple instruction patching code. */
|
||||||
#define DEF_NATIVE(name, code) \
|
#define DEF_NATIVE(ops, name, code) \
|
||||||
extern const char start_##name[], end_##name[]; \
|
extern const char start_##ops##_##name[], end_##ops##_##name[]; \
|
||||||
asm("start_" #name ": " code "; end_" #name ":")
|
asm("start_" #ops "_" #name ": " code "; end_" #ops "_" #name ":")
|
||||||
|
|
||||||
DEF_NATIVE(irq_disable, "cli");
|
DEF_NATIVE(pv_irq_ops, irq_disable, "cli");
|
||||||
DEF_NATIVE(irq_enable, "sti");
|
DEF_NATIVE(pv_irq_ops, irq_enable, "sti");
|
||||||
DEF_NATIVE(restore_fl, "push %eax; popf");
|
DEF_NATIVE(pv_irq_ops, restore_fl, "push %eax; popf");
|
||||||
DEF_NATIVE(save_fl, "pushf; pop %eax");
|
DEF_NATIVE(pv_irq_ops, save_fl, "pushf; pop %eax");
|
||||||
DEF_NATIVE(iret, "iret");
|
DEF_NATIVE(pv_cpu_ops, iret, "iret");
|
||||||
DEF_NATIVE(irq_enable_sysexit, "sti; sysexit");
|
DEF_NATIVE(pv_cpu_ops, irq_enable_sysexit, "sti; sysexit");
|
||||||
DEF_NATIVE(read_cr2, "mov %cr2, %eax");
|
DEF_NATIVE(pv_mmu_ops, read_cr2, "mov %cr2, %eax");
|
||||||
DEF_NATIVE(write_cr3, "mov %eax, %cr3");
|
DEF_NATIVE(pv_mmu_ops, write_cr3, "mov %eax, %cr3");
|
||||||
DEF_NATIVE(read_cr3, "mov %cr3, %eax");
|
DEF_NATIVE(pv_mmu_ops, read_cr3, "mov %cr3, %eax");
|
||||||
DEF_NATIVE(clts, "clts");
|
DEF_NATIVE(pv_cpu_ops, clts, "clts");
|
||||||
DEF_NATIVE(read_tsc, "rdtsc");
|
DEF_NATIVE(pv_cpu_ops, read_tsc, "rdtsc");
|
||||||
|
|
||||||
DEF_NATIVE(ud2a, "ud2a");
|
/* Undefined instruction for dealing with missing ops pointers. */
|
||||||
|
static const unsigned char ud2a[] = { 0x0f, 0x0b };
|
||||||
|
|
||||||
static unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
|
static unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
|
||||||
unsigned long addr, unsigned len)
|
unsigned long addr, unsigned len)
|
||||||
|
@ -76,37 +77,29 @@ static unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
|
||||||
unsigned ret;
|
unsigned ret;
|
||||||
|
|
||||||
switch(type) {
|
switch(type) {
|
||||||
#define SITE(x) case PARAVIRT_PATCH(x): start = start_##x; end = end_##x; goto patch_site
|
#define SITE(ops, x) \
|
||||||
SITE(irq_disable);
|
case PARAVIRT_PATCH(ops.x): \
|
||||||
SITE(irq_enable);
|
start = start_##ops##_##x; \
|
||||||
SITE(restore_fl);
|
end = end_##ops##_##x; \
|
||||||
SITE(save_fl);
|
goto patch_site
|
||||||
SITE(iret);
|
|
||||||
SITE(irq_enable_sysexit);
|
SITE(pv_irq_ops, irq_disable);
|
||||||
SITE(read_cr2);
|
SITE(pv_irq_ops, irq_enable);
|
||||||
SITE(read_cr3);
|
SITE(pv_irq_ops, restore_fl);
|
||||||
SITE(write_cr3);
|
SITE(pv_irq_ops, save_fl);
|
||||||
SITE(clts);
|
SITE(pv_cpu_ops, iret);
|
||||||
SITE(read_tsc);
|
SITE(pv_cpu_ops, irq_enable_sysexit);
|
||||||
|
SITE(pv_mmu_ops, read_cr2);
|
||||||
|
SITE(pv_mmu_ops, read_cr3);
|
||||||
|
SITE(pv_mmu_ops, write_cr3);
|
||||||
|
SITE(pv_cpu_ops, clts);
|
||||||
|
SITE(pv_cpu_ops, read_tsc);
|
||||||
#undef SITE
|
#undef SITE
|
||||||
|
|
||||||
patch_site:
|
patch_site:
|
||||||
ret = paravirt_patch_insns(ibuf, len, start, end);
|
ret = paravirt_patch_insns(ibuf, len, start, end);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case PARAVIRT_PATCH(make_pgd):
|
|
||||||
case PARAVIRT_PATCH(make_pte):
|
|
||||||
case PARAVIRT_PATCH(pgd_val):
|
|
||||||
case PARAVIRT_PATCH(pte_val):
|
|
||||||
#ifdef CONFIG_X86_PAE
|
|
||||||
case PARAVIRT_PATCH(make_pmd):
|
|
||||||
case PARAVIRT_PATCH(pmd_val):
|
|
||||||
#endif
|
|
||||||
/* These functions end up returning exactly what
|
|
||||||
they're passed, in the same registers. */
|
|
||||||
ret = paravirt_patch_nop();
|
|
||||||
break;
|
|
||||||
|
|
||||||
default:
|
default:
|
||||||
ret = paravirt_patch_default(type, clobbers, ibuf, addr, len);
|
ret = paravirt_patch_default(type, clobbers, ibuf, addr, len);
|
||||||
break;
|
break;
|
||||||
|
@ -150,7 +143,7 @@ unsigned paravirt_patch_call(void *insnbuf,
|
||||||
return 5;
|
return 5;
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned paravirt_patch_jmp(const void *target, void *insnbuf,
|
unsigned paravirt_patch_jmp(void *insnbuf, const void *target,
|
||||||
unsigned long addr, unsigned len)
|
unsigned long addr, unsigned len)
|
||||||
{
|
{
|
||||||
struct branch *b = insnbuf;
|
struct branch *b = insnbuf;
|
||||||
|
@ -165,22 +158,37 @@ unsigned paravirt_patch_jmp(const void *target, void *insnbuf,
|
||||||
return 5;
|
return 5;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Neat trick to map patch type back to the call within the
|
||||||
|
* corresponding structure. */
|
||||||
|
static void *get_call_destination(u8 type)
|
||||||
|
{
|
||||||
|
struct paravirt_patch_template tmpl = {
|
||||||
|
.pv_init_ops = pv_init_ops,
|
||||||
|
.pv_time_ops = pv_time_ops,
|
||||||
|
.pv_cpu_ops = pv_cpu_ops,
|
||||||
|
.pv_irq_ops = pv_irq_ops,
|
||||||
|
.pv_apic_ops = pv_apic_ops,
|
||||||
|
.pv_mmu_ops = pv_mmu_ops,
|
||||||
|
};
|
||||||
|
return *((void **)&tmpl + type);
|
||||||
|
}
|
||||||
|
|
||||||
unsigned paravirt_patch_default(u8 type, u16 clobbers, void *insnbuf,
|
unsigned paravirt_patch_default(u8 type, u16 clobbers, void *insnbuf,
|
||||||
unsigned long addr, unsigned len)
|
unsigned long addr, unsigned len)
|
||||||
{
|
{
|
||||||
void *opfunc = *((void **)¶virt_ops + type);
|
void *opfunc = get_call_destination(type);
|
||||||
unsigned ret;
|
unsigned ret;
|
||||||
|
|
||||||
if (opfunc == NULL)
|
if (opfunc == NULL)
|
||||||
/* If there's no function, patch it with a ud2a (BUG) */
|
/* If there's no function, patch it with a ud2a (BUG) */
|
||||||
ret = paravirt_patch_insns(insnbuf, len, start_ud2a, end_ud2a);
|
ret = paravirt_patch_insns(insnbuf, len, ud2a, ud2a+sizeof(ud2a));
|
||||||
else if (opfunc == paravirt_nop)
|
else if (opfunc == paravirt_nop)
|
||||||
/* If the operation is a nop, then nop the callsite */
|
/* If the operation is a nop, then nop the callsite */
|
||||||
ret = paravirt_patch_nop();
|
ret = paravirt_patch_nop();
|
||||||
else if (type == PARAVIRT_PATCH(iret) ||
|
else if (type == PARAVIRT_PATCH(pv_cpu_ops.iret) ||
|
||||||
type == PARAVIRT_PATCH(irq_enable_sysexit))
|
type == PARAVIRT_PATCH(pv_cpu_ops.irq_enable_sysexit))
|
||||||
/* If operation requires a jmp, then jmp */
|
/* If operation requires a jmp, then jmp */
|
||||||
ret = paravirt_patch_jmp(opfunc, insnbuf, addr, len);
|
ret = paravirt_patch_jmp(insnbuf, opfunc, addr, len);
|
||||||
else
|
else
|
||||||
/* Otherwise call the function; assume target could
|
/* Otherwise call the function; assume target could
|
||||||
clobber any caller-save reg */
|
clobber any caller-save reg */
|
||||||
|
@ -205,7 +213,7 @@ unsigned paravirt_patch_insns(void *insnbuf, unsigned len,
|
||||||
|
|
||||||
void init_IRQ(void)
|
void init_IRQ(void)
|
||||||
{
|
{
|
||||||
paravirt_ops.init_IRQ();
|
pv_irq_ops.init_IRQ();
|
||||||
}
|
}
|
||||||
|
|
||||||
static void native_flush_tlb(void)
|
static void native_flush_tlb(void)
|
||||||
|
@ -233,7 +241,7 @@ extern void native_irq_enable_sysexit(void);
|
||||||
|
|
||||||
static int __init print_banner(void)
|
static int __init print_banner(void)
|
||||||
{
|
{
|
||||||
paravirt_ops.banner();
|
pv_init_ops.banner();
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
core_initcall(print_banner);
|
core_initcall(print_banner);
|
||||||
|
@ -273,47 +281,96 @@ int paravirt_disable_iospace(void)
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
struct paravirt_ops paravirt_ops = {
|
static DEFINE_PER_CPU(enum paravirt_lazy_mode, paravirt_lazy_mode) = PARAVIRT_LAZY_NONE;
|
||||||
|
|
||||||
|
static inline void enter_lazy(enum paravirt_lazy_mode mode)
|
||||||
|
{
|
||||||
|
BUG_ON(x86_read_percpu(paravirt_lazy_mode) != PARAVIRT_LAZY_NONE);
|
||||||
|
BUG_ON(preemptible());
|
||||||
|
|
||||||
|
x86_write_percpu(paravirt_lazy_mode, mode);
|
||||||
|
}
|
||||||
|
|
||||||
|
void paravirt_leave_lazy(enum paravirt_lazy_mode mode)
|
||||||
|
{
|
||||||
|
BUG_ON(x86_read_percpu(paravirt_lazy_mode) != mode);
|
||||||
|
BUG_ON(preemptible());
|
||||||
|
|
||||||
|
x86_write_percpu(paravirt_lazy_mode, PARAVIRT_LAZY_NONE);
|
||||||
|
}
|
||||||
|
|
||||||
|
void paravirt_enter_lazy_mmu(void)
|
||||||
|
{
|
||||||
|
enter_lazy(PARAVIRT_LAZY_MMU);
|
||||||
|
}
|
||||||
|
|
||||||
|
void paravirt_leave_lazy_mmu(void)
|
||||||
|
{
|
||||||
|
paravirt_leave_lazy(PARAVIRT_LAZY_MMU);
|
||||||
|
}
|
||||||
|
|
||||||
|
void paravirt_enter_lazy_cpu(void)
|
||||||
|
{
|
||||||
|
enter_lazy(PARAVIRT_LAZY_CPU);
|
||||||
|
}
|
||||||
|
|
||||||
|
void paravirt_leave_lazy_cpu(void)
|
||||||
|
{
|
||||||
|
paravirt_leave_lazy(PARAVIRT_LAZY_CPU);
|
||||||
|
}
|
||||||
|
|
||||||
|
enum paravirt_lazy_mode paravirt_get_lazy_mode(void)
|
||||||
|
{
|
||||||
|
return x86_read_percpu(paravirt_lazy_mode);
|
||||||
|
}
|
||||||
|
|
||||||
|
struct pv_info pv_info = {
|
||||||
.name = "bare hardware",
|
.name = "bare hardware",
|
||||||
.paravirt_enabled = 0,
|
.paravirt_enabled = 0,
|
||||||
.kernel_rpl = 0,
|
.kernel_rpl = 0,
|
||||||
.shared_kernel_pmd = 1, /* Only used when CONFIG_X86_PAE is set */
|
.shared_kernel_pmd = 1, /* Only used when CONFIG_X86_PAE is set */
|
||||||
|
};
|
||||||
|
|
||||||
|
struct pv_init_ops pv_init_ops = {
|
||||||
.patch = native_patch,
|
.patch = native_patch,
|
||||||
.banner = default_banner,
|
.banner = default_banner,
|
||||||
.arch_setup = paravirt_nop,
|
.arch_setup = paravirt_nop,
|
||||||
.memory_setup = machine_specific_memory_setup,
|
.memory_setup = machine_specific_memory_setup,
|
||||||
|
};
|
||||||
|
|
||||||
|
struct pv_time_ops pv_time_ops = {
|
||||||
|
.time_init = hpet_time_init,
|
||||||
.get_wallclock = native_get_wallclock,
|
.get_wallclock = native_get_wallclock,
|
||||||
.set_wallclock = native_set_wallclock,
|
.set_wallclock = native_set_wallclock,
|
||||||
.time_init = hpet_time_init,
|
.sched_clock = native_sched_clock,
|
||||||
.init_IRQ = native_init_IRQ,
|
.get_cpu_khz = native_calculate_cpu_khz,
|
||||||
|
};
|
||||||
|
|
||||||
.cpuid = native_cpuid,
|
struct pv_irq_ops pv_irq_ops = {
|
||||||
.get_debugreg = native_get_debugreg,
|
.init_IRQ = native_init_IRQ,
|
||||||
.set_debugreg = native_set_debugreg,
|
|
||||||
.clts = native_clts,
|
|
||||||
.read_cr0 = native_read_cr0,
|
|
||||||
.write_cr0 = native_write_cr0,
|
|
||||||
.read_cr2 = native_read_cr2,
|
|
||||||
.write_cr2 = native_write_cr2,
|
|
||||||
.read_cr3 = native_read_cr3,
|
|
||||||
.write_cr3 = native_write_cr3,
|
|
||||||
.read_cr4 = native_read_cr4,
|
|
||||||
.read_cr4_safe = native_read_cr4_safe,
|
|
||||||
.write_cr4 = native_write_cr4,
|
|
||||||
.save_fl = native_save_fl,
|
.save_fl = native_save_fl,
|
||||||
.restore_fl = native_restore_fl,
|
.restore_fl = native_restore_fl,
|
||||||
.irq_disable = native_irq_disable,
|
.irq_disable = native_irq_disable,
|
||||||
.irq_enable = native_irq_enable,
|
.irq_enable = native_irq_enable,
|
||||||
.safe_halt = native_safe_halt,
|
.safe_halt = native_safe_halt,
|
||||||
.halt = native_halt,
|
.halt = native_halt,
|
||||||
|
};
|
||||||
|
|
||||||
|
struct pv_cpu_ops pv_cpu_ops = {
|
||||||
|
.cpuid = native_cpuid,
|
||||||
|
.get_debugreg = native_get_debugreg,
|
||||||
|
.set_debugreg = native_set_debugreg,
|
||||||
|
.clts = native_clts,
|
||||||
|
.read_cr0 = native_read_cr0,
|
||||||
|
.write_cr0 = native_write_cr0,
|
||||||
|
.read_cr4 = native_read_cr4,
|
||||||
|
.read_cr4_safe = native_read_cr4_safe,
|
||||||
|
.write_cr4 = native_write_cr4,
|
||||||
.wbinvd = native_wbinvd,
|
.wbinvd = native_wbinvd,
|
||||||
.read_msr = native_read_msr_safe,
|
.read_msr = native_read_msr_safe,
|
||||||
.write_msr = native_write_msr_safe,
|
.write_msr = native_write_msr_safe,
|
||||||
.read_tsc = native_read_tsc,
|
.read_tsc = native_read_tsc,
|
||||||
.read_pmc = native_read_pmc,
|
.read_pmc = native_read_pmc,
|
||||||
.sched_clock = native_sched_clock,
|
|
||||||
.get_cpu_khz = native_calculate_cpu_khz,
|
|
||||||
.load_tr_desc = native_load_tr_desc,
|
.load_tr_desc = native_load_tr_desc,
|
||||||
.set_ldt = native_set_ldt,
|
.set_ldt = native_set_ldt,
|
||||||
.load_gdt = native_load_gdt,
|
.load_gdt = native_load_gdt,
|
||||||
|
@ -327,9 +384,19 @@ struct paravirt_ops paravirt_ops = {
|
||||||
.write_idt_entry = write_dt_entry,
|
.write_idt_entry = write_dt_entry,
|
||||||
.load_esp0 = native_load_esp0,
|
.load_esp0 = native_load_esp0,
|
||||||
|
|
||||||
|
.irq_enable_sysexit = native_irq_enable_sysexit,
|
||||||
|
.iret = native_iret,
|
||||||
|
|
||||||
.set_iopl_mask = native_set_iopl_mask,
|
.set_iopl_mask = native_set_iopl_mask,
|
||||||
.io_delay = native_io_delay,
|
.io_delay = native_io_delay,
|
||||||
|
|
||||||
|
.lazy_mode = {
|
||||||
|
.enter = paravirt_nop,
|
||||||
|
.leave = paravirt_nop,
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
struct pv_apic_ops pv_apic_ops = {
|
||||||
#ifdef CONFIG_X86_LOCAL_APIC
|
#ifdef CONFIG_X86_LOCAL_APIC
|
||||||
.apic_write = native_apic_write,
|
.apic_write = native_apic_write,
|
||||||
.apic_write_atomic = native_apic_write_atomic,
|
.apic_write_atomic = native_apic_write_atomic,
|
||||||
|
@ -338,11 +405,17 @@ struct paravirt_ops paravirt_ops = {
|
||||||
.setup_secondary_clock = setup_secondary_APIC_clock,
|
.setup_secondary_clock = setup_secondary_APIC_clock,
|
||||||
.startup_ipi_hook = paravirt_nop,
|
.startup_ipi_hook = paravirt_nop,
|
||||||
#endif
|
#endif
|
||||||
.set_lazy_mode = paravirt_nop,
|
};
|
||||||
|
|
||||||
|
struct pv_mmu_ops pv_mmu_ops = {
|
||||||
.pagetable_setup_start = native_pagetable_setup_start,
|
.pagetable_setup_start = native_pagetable_setup_start,
|
||||||
.pagetable_setup_done = native_pagetable_setup_done,
|
.pagetable_setup_done = native_pagetable_setup_done,
|
||||||
|
|
||||||
|
.read_cr2 = native_read_cr2,
|
||||||
|
.write_cr2 = native_write_cr2,
|
||||||
|
.read_cr3 = native_read_cr3,
|
||||||
|
.write_cr3 = native_write_cr3,
|
||||||
|
|
||||||
.flush_tlb_user = native_flush_tlb,
|
.flush_tlb_user = native_flush_tlb,
|
||||||
.flush_tlb_kernel = native_flush_tlb_global,
|
.flush_tlb_kernel = native_flush_tlb_global,
|
||||||
.flush_tlb_single = native_flush_tlb_single,
|
.flush_tlb_single = native_flush_tlb_single,
|
||||||
|
@ -381,12 +454,19 @@ struct paravirt_ops paravirt_ops = {
|
||||||
.make_pte = native_make_pte,
|
.make_pte = native_make_pte,
|
||||||
.make_pgd = native_make_pgd,
|
.make_pgd = native_make_pgd,
|
||||||
|
|
||||||
.irq_enable_sysexit = native_irq_enable_sysexit,
|
|
||||||
.iret = native_iret,
|
|
||||||
|
|
||||||
.dup_mmap = paravirt_nop,
|
.dup_mmap = paravirt_nop,
|
||||||
.exit_mmap = paravirt_nop,
|
.exit_mmap = paravirt_nop,
|
||||||
.activate_mm = paravirt_nop,
|
.activate_mm = paravirt_nop,
|
||||||
|
|
||||||
|
.lazy_mode = {
|
||||||
|
.enter = paravirt_nop,
|
||||||
|
.leave = paravirt_nop,
|
||||||
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
EXPORT_SYMBOL(paravirt_ops);
|
EXPORT_SYMBOL_GPL(pv_time_ops);
|
||||||
|
EXPORT_SYMBOL_GPL(pv_cpu_ops);
|
||||||
|
EXPORT_SYMBOL_GPL(pv_mmu_ops);
|
||||||
|
EXPORT_SYMBOL_GPL(pv_apic_ops);
|
||||||
|
EXPORT_SYMBOL_GPL(pv_info);
|
||||||
|
EXPORT_SYMBOL (pv_irq_ops);
|
||||||
|
|
|
@ -134,21 +134,21 @@ static unsigned vmi_patch(u8 type, u16 clobbers, void *insns,
|
||||||
unsigned long eip, unsigned len)
|
unsigned long eip, unsigned len)
|
||||||
{
|
{
|
||||||
switch (type) {
|
switch (type) {
|
||||||
case PARAVIRT_PATCH(irq_disable):
|
case PARAVIRT_PATCH(pv_irq_ops.irq_disable):
|
||||||
return patch_internal(VMI_CALL_DisableInterrupts, len,
|
return patch_internal(VMI_CALL_DisableInterrupts, len,
|
||||||
insns, eip);
|
insns, eip);
|
||||||
case PARAVIRT_PATCH(irq_enable):
|
case PARAVIRT_PATCH(pv_irq_ops.irq_enable):
|
||||||
return patch_internal(VMI_CALL_EnableInterrupts, len,
|
return patch_internal(VMI_CALL_EnableInterrupts, len,
|
||||||
insns, eip);
|
insns, eip);
|
||||||
case PARAVIRT_PATCH(restore_fl):
|
case PARAVIRT_PATCH(pv_irq_ops.restore_fl):
|
||||||
return patch_internal(VMI_CALL_SetInterruptMask, len,
|
return patch_internal(VMI_CALL_SetInterruptMask, len,
|
||||||
insns, eip);
|
insns, eip);
|
||||||
case PARAVIRT_PATCH(save_fl):
|
case PARAVIRT_PATCH(pv_irq_ops.save_fl):
|
||||||
return patch_internal(VMI_CALL_GetInterruptMask, len,
|
return patch_internal(VMI_CALL_GetInterruptMask, len,
|
||||||
insns, eip);
|
insns, eip);
|
||||||
case PARAVIRT_PATCH(iret):
|
case PARAVIRT_PATCH(pv_cpu_ops.iret):
|
||||||
return patch_internal(VMI_CALL_IRET, len, insns, eip);
|
return patch_internal(VMI_CALL_IRET, len, insns, eip);
|
||||||
case PARAVIRT_PATCH(irq_enable_sysexit):
|
case PARAVIRT_PATCH(pv_cpu_ops.irq_enable_sysexit):
|
||||||
return patch_internal(VMI_CALL_SYSEXIT, len, insns, eip);
|
return patch_internal(VMI_CALL_SYSEXIT, len, insns, eip);
|
||||||
default:
|
default:
|
||||||
break;
|
break;
|
||||||
|
@ -552,24 +552,22 @@ vmi_startup_ipi_hook(int phys_apicid, unsigned long start_eip,
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
static void vmi_set_lazy_mode(enum paravirt_lazy_mode mode)
|
static void vmi_enter_lazy_cpu(void)
|
||||||
{
|
{
|
||||||
static DEFINE_PER_CPU(enum paravirt_lazy_mode, lazy_mode);
|
paravirt_enter_lazy_cpu();
|
||||||
|
vmi_ops.set_lazy_mode(2);
|
||||||
|
}
|
||||||
|
|
||||||
if (!vmi_ops.set_lazy_mode)
|
static void vmi_enter_lazy_mmu(void)
|
||||||
return;
|
{
|
||||||
|
paravirt_enter_lazy_mmu();
|
||||||
|
vmi_ops.set_lazy_mode(1);
|
||||||
|
}
|
||||||
|
|
||||||
/* Modes should never nest or overlap */
|
static void vmi_leave_lazy(void)
|
||||||
BUG_ON(__get_cpu_var(lazy_mode) && !(mode == PARAVIRT_LAZY_NONE ||
|
{
|
||||||
mode == PARAVIRT_LAZY_FLUSH));
|
paravirt_leave_lazy(paravirt_get_lazy_mode());
|
||||||
|
|
||||||
if (mode == PARAVIRT_LAZY_FLUSH) {
|
|
||||||
vmi_ops.set_lazy_mode(0);
|
vmi_ops.set_lazy_mode(0);
|
||||||
vmi_ops.set_lazy_mode(__get_cpu_var(lazy_mode));
|
|
||||||
} else {
|
|
||||||
vmi_ops.set_lazy_mode(mode);
|
|
||||||
__get_cpu_var(lazy_mode) = mode;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline int __init check_vmi_rom(struct vrom_header *rom)
|
static inline int __init check_vmi_rom(struct vrom_header *rom)
|
||||||
|
@ -690,9 +688,9 @@ do { \
|
||||||
reloc = call_vrom_long_func(vmi_rom, get_reloc, \
|
reloc = call_vrom_long_func(vmi_rom, get_reloc, \
|
||||||
VMI_CALL_##vmicall); \
|
VMI_CALL_##vmicall); \
|
||||||
if (rel->type == VMI_RELOCATION_CALL_REL) \
|
if (rel->type == VMI_RELOCATION_CALL_REL) \
|
||||||
paravirt_ops.opname = (void *)rel->eip; \
|
opname = (void *)rel->eip; \
|
||||||
else if (rel->type == VMI_RELOCATION_NOP) \
|
else if (rel->type == VMI_RELOCATION_NOP) \
|
||||||
paravirt_ops.opname = (void *)vmi_nop; \
|
opname = (void *)vmi_nop; \
|
||||||
else if (rel->type != VMI_RELOCATION_NONE) \
|
else if (rel->type != VMI_RELOCATION_NONE) \
|
||||||
printk(KERN_WARNING "VMI: Unknown relocation " \
|
printk(KERN_WARNING "VMI: Unknown relocation " \
|
||||||
"type %d for " #vmicall"\n",\
|
"type %d for " #vmicall"\n",\
|
||||||
|
@ -712,7 +710,7 @@ do { \
|
||||||
VMI_CALL_##vmicall); \
|
VMI_CALL_##vmicall); \
|
||||||
BUG_ON(rel->type == VMI_RELOCATION_JUMP_REL); \
|
BUG_ON(rel->type == VMI_RELOCATION_JUMP_REL); \
|
||||||
if (rel->type == VMI_RELOCATION_CALL_REL) { \
|
if (rel->type == VMI_RELOCATION_CALL_REL) { \
|
||||||
paravirt_ops.opname = wrapper; \
|
opname = wrapper; \
|
||||||
vmi_ops.cache = (void *)rel->eip; \
|
vmi_ops.cache = (void *)rel->eip; \
|
||||||
} \
|
} \
|
||||||
} while (0)
|
} while (0)
|
||||||
|
@ -732,11 +730,11 @@ static inline int __init activate_vmi(void)
|
||||||
}
|
}
|
||||||
savesegment(cs, kernel_cs);
|
savesegment(cs, kernel_cs);
|
||||||
|
|
||||||
paravirt_ops.paravirt_enabled = 1;
|
pv_info.paravirt_enabled = 1;
|
||||||
paravirt_ops.kernel_rpl = kernel_cs & SEGMENT_RPL_MASK;
|
pv_info.kernel_rpl = kernel_cs & SEGMENT_RPL_MASK;
|
||||||
|
pv_info.name = "vmi";
|
||||||
|
|
||||||
paravirt_ops.patch = vmi_patch;
|
pv_init_ops.patch = vmi_patch;
|
||||||
paravirt_ops.name = "vmi";
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Many of these operations are ABI compatible with VMI.
|
* Many of these operations are ABI compatible with VMI.
|
||||||
|
@ -754,26 +752,26 @@ static inline int __init activate_vmi(void)
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/* CPUID is special, so very special it gets wrapped like a present */
|
/* CPUID is special, so very special it gets wrapped like a present */
|
||||||
para_wrap(cpuid, vmi_cpuid, cpuid, CPUID);
|
para_wrap(pv_cpu_ops.cpuid, vmi_cpuid, cpuid, CPUID);
|
||||||
|
|
||||||
para_fill(clts, CLTS);
|
para_fill(pv_cpu_ops.clts, CLTS);
|
||||||
para_fill(get_debugreg, GetDR);
|
para_fill(pv_cpu_ops.get_debugreg, GetDR);
|
||||||
para_fill(set_debugreg, SetDR);
|
para_fill(pv_cpu_ops.set_debugreg, SetDR);
|
||||||
para_fill(read_cr0, GetCR0);
|
para_fill(pv_cpu_ops.read_cr0, GetCR0);
|
||||||
para_fill(read_cr2, GetCR2);
|
para_fill(pv_mmu_ops.read_cr2, GetCR2);
|
||||||
para_fill(read_cr3, GetCR3);
|
para_fill(pv_mmu_ops.read_cr3, GetCR3);
|
||||||
para_fill(read_cr4, GetCR4);
|
para_fill(pv_cpu_ops.read_cr4, GetCR4);
|
||||||
para_fill(write_cr0, SetCR0);
|
para_fill(pv_cpu_ops.write_cr0, SetCR0);
|
||||||
para_fill(write_cr2, SetCR2);
|
para_fill(pv_mmu_ops.write_cr2, SetCR2);
|
||||||
para_fill(write_cr3, SetCR3);
|
para_fill(pv_mmu_ops.write_cr3, SetCR3);
|
||||||
para_fill(write_cr4, SetCR4);
|
para_fill(pv_cpu_ops.write_cr4, SetCR4);
|
||||||
para_fill(save_fl, GetInterruptMask);
|
para_fill(pv_irq_ops.save_fl, GetInterruptMask);
|
||||||
para_fill(restore_fl, SetInterruptMask);
|
para_fill(pv_irq_ops.restore_fl, SetInterruptMask);
|
||||||
para_fill(irq_disable, DisableInterrupts);
|
para_fill(pv_irq_ops.irq_disable, DisableInterrupts);
|
||||||
para_fill(irq_enable, EnableInterrupts);
|
para_fill(pv_irq_ops.irq_enable, EnableInterrupts);
|
||||||
|
|
||||||
para_fill(wbinvd, WBINVD);
|
para_fill(pv_cpu_ops.wbinvd, WBINVD);
|
||||||
para_fill(read_tsc, RDTSC);
|
para_fill(pv_cpu_ops.read_tsc, RDTSC);
|
||||||
|
|
||||||
/* The following we emulate with trap and emulate for now */
|
/* The following we emulate with trap and emulate for now */
|
||||||
/* paravirt_ops.read_msr = vmi_rdmsr */
|
/* paravirt_ops.read_msr = vmi_rdmsr */
|
||||||
|
@ -781,29 +779,38 @@ static inline int __init activate_vmi(void)
|
||||||
/* paravirt_ops.rdpmc = vmi_rdpmc */
|
/* paravirt_ops.rdpmc = vmi_rdpmc */
|
||||||
|
|
||||||
/* TR interface doesn't pass TR value, wrap */
|
/* TR interface doesn't pass TR value, wrap */
|
||||||
para_wrap(load_tr_desc, vmi_set_tr, set_tr, SetTR);
|
para_wrap(pv_cpu_ops.load_tr_desc, vmi_set_tr, set_tr, SetTR);
|
||||||
|
|
||||||
/* LDT is special, too */
|
/* LDT is special, too */
|
||||||
para_wrap(set_ldt, vmi_set_ldt, _set_ldt, SetLDT);
|
para_wrap(pv_cpu_ops.set_ldt, vmi_set_ldt, _set_ldt, SetLDT);
|
||||||
|
|
||||||
para_fill(load_gdt, SetGDT);
|
para_fill(pv_cpu_ops.load_gdt, SetGDT);
|
||||||
para_fill(load_idt, SetIDT);
|
para_fill(pv_cpu_ops.load_idt, SetIDT);
|
||||||
para_fill(store_gdt, GetGDT);
|
para_fill(pv_cpu_ops.store_gdt, GetGDT);
|
||||||
para_fill(store_idt, GetIDT);
|
para_fill(pv_cpu_ops.store_idt, GetIDT);
|
||||||
para_fill(store_tr, GetTR);
|
para_fill(pv_cpu_ops.store_tr, GetTR);
|
||||||
paravirt_ops.load_tls = vmi_load_tls;
|
pv_cpu_ops.load_tls = vmi_load_tls;
|
||||||
para_fill(write_ldt_entry, WriteLDTEntry);
|
para_fill(pv_cpu_ops.write_ldt_entry, WriteLDTEntry);
|
||||||
para_fill(write_gdt_entry, WriteGDTEntry);
|
para_fill(pv_cpu_ops.write_gdt_entry, WriteGDTEntry);
|
||||||
para_fill(write_idt_entry, WriteIDTEntry);
|
para_fill(pv_cpu_ops.write_idt_entry, WriteIDTEntry);
|
||||||
para_wrap(load_esp0, vmi_load_esp0, set_kernel_stack, UpdateKernelStack);
|
para_wrap(pv_cpu_ops.load_esp0, vmi_load_esp0, set_kernel_stack, UpdateKernelStack);
|
||||||
para_fill(set_iopl_mask, SetIOPLMask);
|
para_fill(pv_cpu_ops.set_iopl_mask, SetIOPLMask);
|
||||||
para_fill(io_delay, IODelay);
|
para_fill(pv_cpu_ops.io_delay, IODelay);
|
||||||
para_wrap(set_lazy_mode, vmi_set_lazy_mode, set_lazy_mode, SetLazyMode);
|
|
||||||
|
para_wrap(pv_cpu_ops.lazy_mode.enter, vmi_enter_lazy_cpu,
|
||||||
|
set_lazy_mode, SetLazyMode);
|
||||||
|
para_wrap(pv_cpu_ops.lazy_mode.leave, vmi_leave_lazy,
|
||||||
|
set_lazy_mode, SetLazyMode);
|
||||||
|
|
||||||
|
para_wrap(pv_mmu_ops.lazy_mode.enter, vmi_enter_lazy_mmu,
|
||||||
|
set_lazy_mode, SetLazyMode);
|
||||||
|
para_wrap(pv_mmu_ops.lazy_mode.leave, vmi_leave_lazy,
|
||||||
|
set_lazy_mode, SetLazyMode);
|
||||||
|
|
||||||
/* user and kernel flush are just handled with different flags to FlushTLB */
|
/* user and kernel flush are just handled with different flags to FlushTLB */
|
||||||
para_wrap(flush_tlb_user, vmi_flush_tlb_user, _flush_tlb, FlushTLB);
|
para_wrap(pv_mmu_ops.flush_tlb_user, vmi_flush_tlb_user, _flush_tlb, FlushTLB);
|
||||||
para_wrap(flush_tlb_kernel, vmi_flush_tlb_kernel, _flush_tlb, FlushTLB);
|
para_wrap(pv_mmu_ops.flush_tlb_kernel, vmi_flush_tlb_kernel, _flush_tlb, FlushTLB);
|
||||||
para_fill(flush_tlb_single, InvalPage);
|
para_fill(pv_mmu_ops.flush_tlb_single, InvalPage);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Until a standard flag format can be agreed on, we need to
|
* Until a standard flag format can be agreed on, we need to
|
||||||
|
@ -819,41 +826,41 @@ static inline int __init activate_vmi(void)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
if (vmi_ops.set_pte) {
|
if (vmi_ops.set_pte) {
|
||||||
paravirt_ops.set_pte = vmi_set_pte;
|
pv_mmu_ops.set_pte = vmi_set_pte;
|
||||||
paravirt_ops.set_pte_at = vmi_set_pte_at;
|
pv_mmu_ops.set_pte_at = vmi_set_pte_at;
|
||||||
paravirt_ops.set_pmd = vmi_set_pmd;
|
pv_mmu_ops.set_pmd = vmi_set_pmd;
|
||||||
#ifdef CONFIG_X86_PAE
|
#ifdef CONFIG_X86_PAE
|
||||||
paravirt_ops.set_pte_atomic = vmi_set_pte_atomic;
|
pv_mmu_ops.set_pte_atomic = vmi_set_pte_atomic;
|
||||||
paravirt_ops.set_pte_present = vmi_set_pte_present;
|
pv_mmu_ops.set_pte_present = vmi_set_pte_present;
|
||||||
paravirt_ops.set_pud = vmi_set_pud;
|
pv_mmu_ops.set_pud = vmi_set_pud;
|
||||||
paravirt_ops.pte_clear = vmi_pte_clear;
|
pv_mmu_ops.pte_clear = vmi_pte_clear;
|
||||||
paravirt_ops.pmd_clear = vmi_pmd_clear;
|
pv_mmu_ops.pmd_clear = vmi_pmd_clear;
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
if (vmi_ops.update_pte) {
|
if (vmi_ops.update_pte) {
|
||||||
paravirt_ops.pte_update = vmi_update_pte;
|
pv_mmu_ops.pte_update = vmi_update_pte;
|
||||||
paravirt_ops.pte_update_defer = vmi_update_pte_defer;
|
pv_mmu_ops.pte_update_defer = vmi_update_pte_defer;
|
||||||
}
|
}
|
||||||
|
|
||||||
vmi_ops.allocate_page = vmi_get_function(VMI_CALL_AllocatePage);
|
vmi_ops.allocate_page = vmi_get_function(VMI_CALL_AllocatePage);
|
||||||
if (vmi_ops.allocate_page) {
|
if (vmi_ops.allocate_page) {
|
||||||
paravirt_ops.alloc_pt = vmi_allocate_pt;
|
pv_mmu_ops.alloc_pt = vmi_allocate_pt;
|
||||||
paravirt_ops.alloc_pd = vmi_allocate_pd;
|
pv_mmu_ops.alloc_pd = vmi_allocate_pd;
|
||||||
paravirt_ops.alloc_pd_clone = vmi_allocate_pd_clone;
|
pv_mmu_ops.alloc_pd_clone = vmi_allocate_pd_clone;
|
||||||
}
|
}
|
||||||
|
|
||||||
vmi_ops.release_page = vmi_get_function(VMI_CALL_ReleasePage);
|
vmi_ops.release_page = vmi_get_function(VMI_CALL_ReleasePage);
|
||||||
if (vmi_ops.release_page) {
|
if (vmi_ops.release_page) {
|
||||||
paravirt_ops.release_pt = vmi_release_pt;
|
pv_mmu_ops.release_pt = vmi_release_pt;
|
||||||
paravirt_ops.release_pd = vmi_release_pd;
|
pv_mmu_ops.release_pd = vmi_release_pd;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Set linear is needed in all cases */
|
/* Set linear is needed in all cases */
|
||||||
vmi_ops.set_linear_mapping = vmi_get_function(VMI_CALL_SetLinearMapping);
|
vmi_ops.set_linear_mapping = vmi_get_function(VMI_CALL_SetLinearMapping);
|
||||||
#ifdef CONFIG_HIGHPTE
|
#ifdef CONFIG_HIGHPTE
|
||||||
if (vmi_ops.set_linear_mapping)
|
if (vmi_ops.set_linear_mapping)
|
||||||
paravirt_ops.kmap_atomic_pte = vmi_kmap_atomic_pte;
|
pv_mmu_ops.kmap_atomic_pte = vmi_kmap_atomic_pte;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -863,17 +870,17 @@ static inline int __init activate_vmi(void)
|
||||||
* the backend. They are performance critical anyway, so requiring
|
* the backend. They are performance critical anyway, so requiring
|
||||||
* a patch is not a big problem.
|
* a patch is not a big problem.
|
||||||
*/
|
*/
|
||||||
paravirt_ops.irq_enable_sysexit = (void *)0xfeedbab0;
|
pv_cpu_ops.irq_enable_sysexit = (void *)0xfeedbab0;
|
||||||
paravirt_ops.iret = (void *)0xbadbab0;
|
pv_cpu_ops.iret = (void *)0xbadbab0;
|
||||||
|
|
||||||
#ifdef CONFIG_SMP
|
#ifdef CONFIG_SMP
|
||||||
para_wrap(startup_ipi_hook, vmi_startup_ipi_hook, set_initial_ap_state, SetInitialAPState);
|
para_wrap(pv_apic_ops.startup_ipi_hook, vmi_startup_ipi_hook, set_initial_ap_state, SetInitialAPState);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef CONFIG_X86_LOCAL_APIC
|
#ifdef CONFIG_X86_LOCAL_APIC
|
||||||
para_fill(apic_read, APICRead);
|
para_fill(pv_apic_ops.apic_read, APICRead);
|
||||||
para_fill(apic_write, APICWrite);
|
para_fill(pv_apic_ops.apic_write, APICWrite);
|
||||||
para_fill(apic_write_atomic, APICWrite);
|
para_fill(pv_apic_ops.apic_write_atomic, APICWrite);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -891,15 +898,15 @@ static inline int __init activate_vmi(void)
|
||||||
vmi_timer_ops.set_alarm = vmi_get_function(VMI_CALL_SetAlarm);
|
vmi_timer_ops.set_alarm = vmi_get_function(VMI_CALL_SetAlarm);
|
||||||
vmi_timer_ops.cancel_alarm =
|
vmi_timer_ops.cancel_alarm =
|
||||||
vmi_get_function(VMI_CALL_CancelAlarm);
|
vmi_get_function(VMI_CALL_CancelAlarm);
|
||||||
paravirt_ops.time_init = vmi_time_init;
|
pv_time_ops.time_init = vmi_time_init;
|
||||||
paravirt_ops.get_wallclock = vmi_get_wallclock;
|
pv_time_ops.get_wallclock = vmi_get_wallclock;
|
||||||
paravirt_ops.set_wallclock = vmi_set_wallclock;
|
pv_time_ops.set_wallclock = vmi_set_wallclock;
|
||||||
#ifdef CONFIG_X86_LOCAL_APIC
|
#ifdef CONFIG_X86_LOCAL_APIC
|
||||||
paravirt_ops.setup_boot_clock = vmi_time_bsp_init;
|
pv_apic_ops.setup_boot_clock = vmi_time_bsp_init;
|
||||||
paravirt_ops.setup_secondary_clock = vmi_time_ap_init;
|
pv_apic_ops.setup_secondary_clock = vmi_time_ap_init;
|
||||||
#endif
|
#endif
|
||||||
paravirt_ops.sched_clock = vmi_sched_clock;
|
pv_time_ops.sched_clock = vmi_sched_clock;
|
||||||
paravirt_ops.get_cpu_khz = vmi_cpu_khz;
|
pv_time_ops.get_cpu_khz = vmi_cpu_khz;
|
||||||
|
|
||||||
/* We have true wallclock functions; disable CMOS clock sync */
|
/* We have true wallclock functions; disable CMOS clock sync */
|
||||||
no_sync_cmos_clock = 1;
|
no_sync_cmos_clock = 1;
|
||||||
|
@ -908,7 +915,7 @@ static inline int __init activate_vmi(void)
|
||||||
disable_vmi_timer = 1;
|
disable_vmi_timer = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
para_fill(safe_halt, Halt);
|
para_fill(pv_irq_ops.safe_halt, Halt);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Alternative instruction rewriting doesn't happen soon enough
|
* Alternative instruction rewriting doesn't happen soon enough
|
||||||
|
|
|
@ -741,24 +741,12 @@ struct kmem_cache *pmd_cache;
|
||||||
|
|
||||||
void __init pgtable_cache_init(void)
|
void __init pgtable_cache_init(void)
|
||||||
{
|
{
|
||||||
size_t pgd_size = PTRS_PER_PGD*sizeof(pgd_t);
|
if (PTRS_PER_PMD > 1)
|
||||||
|
|
||||||
if (PTRS_PER_PMD > 1) {
|
|
||||||
pmd_cache = kmem_cache_create("pmd",
|
pmd_cache = kmem_cache_create("pmd",
|
||||||
PTRS_PER_PMD*sizeof(pmd_t),
|
PTRS_PER_PMD*sizeof(pmd_t),
|
||||||
PTRS_PER_PMD*sizeof(pmd_t),
|
PTRS_PER_PMD*sizeof(pmd_t),
|
||||||
SLAB_PANIC,
|
SLAB_PANIC,
|
||||||
pmd_ctor);
|
pmd_ctor);
|
||||||
if (!SHARED_KERNEL_PMD) {
|
|
||||||
/* If we're in PAE mode and have a non-shared
|
|
||||||
kernel pmd, then the pgd size must be a
|
|
||||||
page size. This is because the pgd_list
|
|
||||||
links through the page structure, so there
|
|
||||||
can only be one pgd per page for this to
|
|
||||||
work. */
|
|
||||||
pgd_size = PAGE_SIZE;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
|
@ -25,7 +25,6 @@
|
||||||
#include <linux/mm.h>
|
#include <linux/mm.h>
|
||||||
#include <linux/page-flags.h>
|
#include <linux/page-flags.h>
|
||||||
#include <linux/highmem.h>
|
#include <linux/highmem.h>
|
||||||
#include <linux/smp.h>
|
|
||||||
|
|
||||||
#include <xen/interface/xen.h>
|
#include <xen/interface/xen.h>
|
||||||
#include <xen/interface/physdev.h>
|
#include <xen/interface/physdev.h>
|
||||||
|
@ -52,11 +51,25 @@
|
||||||
|
|
||||||
EXPORT_SYMBOL_GPL(hypercall_page);
|
EXPORT_SYMBOL_GPL(hypercall_page);
|
||||||
|
|
||||||
DEFINE_PER_CPU(enum paravirt_lazy_mode, xen_lazy_mode);
|
|
||||||
|
|
||||||
DEFINE_PER_CPU(struct vcpu_info *, xen_vcpu);
|
DEFINE_PER_CPU(struct vcpu_info *, xen_vcpu);
|
||||||
DEFINE_PER_CPU(struct vcpu_info, xen_vcpu_info);
|
DEFINE_PER_CPU(struct vcpu_info, xen_vcpu_info);
|
||||||
DEFINE_PER_CPU(unsigned long, xen_cr3);
|
|
||||||
|
/*
|
||||||
|
* Note about cr3 (pagetable base) values:
|
||||||
|
*
|
||||||
|
* xen_cr3 contains the current logical cr3 value; it contains the
|
||||||
|
* last set cr3. This may not be the current effective cr3, because
|
||||||
|
* its update may be being lazily deferred. However, a vcpu looking
|
||||||
|
* at its own cr3 can use this value knowing that it everything will
|
||||||
|
* be self-consistent.
|
||||||
|
*
|
||||||
|
* xen_current_cr3 contains the actual vcpu cr3; it is set once the
|
||||||
|
* hypercall to set the vcpu cr3 is complete (so it may be a little
|
||||||
|
* out of date, but it will never be set early). If one vcpu is
|
||||||
|
* looking at another vcpu's cr3 value, it should use this variable.
|
||||||
|
*/
|
||||||
|
DEFINE_PER_CPU(unsigned long, xen_cr3); /* cr3 stored as physaddr */
|
||||||
|
DEFINE_PER_CPU(unsigned long, xen_current_cr3); /* actual vcpu cr3 */
|
||||||
|
|
||||||
struct start_info *xen_start_info;
|
struct start_info *xen_start_info;
|
||||||
EXPORT_SYMBOL_GPL(xen_start_info);
|
EXPORT_SYMBOL_GPL(xen_start_info);
|
||||||
|
@ -100,7 +113,7 @@ static void __init xen_vcpu_setup(int cpu)
|
||||||
info.mfn = virt_to_mfn(vcpup);
|
info.mfn = virt_to_mfn(vcpup);
|
||||||
info.offset = offset_in_page(vcpup);
|
info.offset = offset_in_page(vcpup);
|
||||||
|
|
||||||
printk(KERN_DEBUG "trying to map vcpu_info %d at %p, mfn %x, offset %d\n",
|
printk(KERN_DEBUG "trying to map vcpu_info %d at %p, mfn %llx, offset %d\n",
|
||||||
cpu, vcpup, info.mfn, info.offset);
|
cpu, vcpup, info.mfn, info.offset);
|
||||||
|
|
||||||
/* Check to see if the hypervisor will put the vcpu_info
|
/* Check to see if the hypervisor will put the vcpu_info
|
||||||
|
@ -124,7 +137,7 @@ static void __init xen_vcpu_setup(int cpu)
|
||||||
static void __init xen_banner(void)
|
static void __init xen_banner(void)
|
||||||
{
|
{
|
||||||
printk(KERN_INFO "Booting paravirtualized kernel on %s\n",
|
printk(KERN_INFO "Booting paravirtualized kernel on %s\n",
|
||||||
paravirt_ops.name);
|
pv_info.name);
|
||||||
printk(KERN_INFO "Hypervisor signature: %s\n", xen_start_info->magic);
|
printk(KERN_INFO "Hypervisor signature: %s\n", xen_start_info->magic);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -249,29 +262,10 @@ static void xen_halt(void)
|
||||||
xen_safe_halt();
|
xen_safe_halt();
|
||||||
}
|
}
|
||||||
|
|
||||||
static void xen_set_lazy_mode(enum paravirt_lazy_mode mode)
|
static void xen_leave_lazy(void)
|
||||||
{
|
{
|
||||||
BUG_ON(preemptible());
|
paravirt_leave_lazy(paravirt_get_lazy_mode());
|
||||||
|
|
||||||
switch (mode) {
|
|
||||||
case PARAVIRT_LAZY_NONE:
|
|
||||||
BUG_ON(x86_read_percpu(xen_lazy_mode) == PARAVIRT_LAZY_NONE);
|
|
||||||
break;
|
|
||||||
|
|
||||||
case PARAVIRT_LAZY_MMU:
|
|
||||||
case PARAVIRT_LAZY_CPU:
|
|
||||||
BUG_ON(x86_read_percpu(xen_lazy_mode) != PARAVIRT_LAZY_NONE);
|
|
||||||
break;
|
|
||||||
|
|
||||||
case PARAVIRT_LAZY_FLUSH:
|
|
||||||
/* flush if necessary, but don't change state */
|
|
||||||
if (x86_read_percpu(xen_lazy_mode) != PARAVIRT_LAZY_NONE)
|
|
||||||
xen_mc_flush();
|
xen_mc_flush();
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
xen_mc_flush();
|
|
||||||
x86_write_percpu(xen_lazy_mode, mode);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static unsigned long xen_store_tr(void)
|
static unsigned long xen_store_tr(void)
|
||||||
|
@ -358,7 +352,7 @@ static void xen_load_tls(struct thread_struct *t, unsigned int cpu)
|
||||||
* loaded properly. This will go away as soon as Xen has been
|
* loaded properly. This will go away as soon as Xen has been
|
||||||
* modified to not save/restore %gs for normal hypercalls.
|
* modified to not save/restore %gs for normal hypercalls.
|
||||||
*/
|
*/
|
||||||
if (xen_get_lazy_mode() == PARAVIRT_LAZY_CPU)
|
if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_CPU)
|
||||||
loadsegment(gs, 0);
|
loadsegment(gs, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -632,32 +626,36 @@ static unsigned long xen_read_cr3(void)
|
||||||
return x86_read_percpu(xen_cr3);
|
return x86_read_percpu(xen_cr3);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void set_current_cr3(void *v)
|
||||||
|
{
|
||||||
|
x86_write_percpu(xen_current_cr3, (unsigned long)v);
|
||||||
|
}
|
||||||
|
|
||||||
static void xen_write_cr3(unsigned long cr3)
|
static void xen_write_cr3(unsigned long cr3)
|
||||||
{
|
{
|
||||||
|
struct mmuext_op *op;
|
||||||
|
struct multicall_space mcs;
|
||||||
|
unsigned long mfn = pfn_to_mfn(PFN_DOWN(cr3));
|
||||||
|
|
||||||
BUG_ON(preemptible());
|
BUG_ON(preemptible());
|
||||||
|
|
||||||
if (cr3 == x86_read_percpu(xen_cr3)) {
|
mcs = xen_mc_entry(sizeof(*op)); /* disables interrupts */
|
||||||
/* just a simple tlb flush */
|
|
||||||
xen_flush_tlb();
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
/* Update while interrupts are disabled, so its atomic with
|
||||||
|
respect to ipis */
|
||||||
x86_write_percpu(xen_cr3, cr3);
|
x86_write_percpu(xen_cr3, cr3);
|
||||||
|
|
||||||
|
|
||||||
{
|
|
||||||
struct mmuext_op *op;
|
|
||||||
struct multicall_space mcs = xen_mc_entry(sizeof(*op));
|
|
||||||
unsigned long mfn = pfn_to_mfn(PFN_DOWN(cr3));
|
|
||||||
|
|
||||||
op = mcs.args;
|
op = mcs.args;
|
||||||
op->cmd = MMUEXT_NEW_BASEPTR;
|
op->cmd = MMUEXT_NEW_BASEPTR;
|
||||||
op->arg1.mfn = mfn;
|
op->arg1.mfn = mfn;
|
||||||
|
|
||||||
MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF);
|
MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF);
|
||||||
|
|
||||||
xen_mc_issue(PARAVIRT_LAZY_CPU);
|
/* Update xen_update_cr3 once the batch has actually
|
||||||
}
|
been submitted. */
|
||||||
|
xen_mc_callback(set_current_cr3, (void *)cr3);
|
||||||
|
|
||||||
|
xen_mc_issue(PARAVIRT_LAZY_CPU); /* interrupts restored */
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Early in boot, while setting up the initial pagetable, assume
|
/* Early in boot, while setting up the initial pagetable, assume
|
||||||
|
@ -668,6 +666,15 @@ static __init void xen_alloc_pt_init(struct mm_struct *mm, u32 pfn)
|
||||||
make_lowmem_page_readonly(__va(PFN_PHYS(pfn)));
|
make_lowmem_page_readonly(__va(PFN_PHYS(pfn)));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void pin_pagetable_pfn(unsigned level, unsigned long pfn)
|
||||||
|
{
|
||||||
|
struct mmuext_op op;
|
||||||
|
op.cmd = level;
|
||||||
|
op.arg1.mfn = pfn_to_mfn(pfn);
|
||||||
|
if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF))
|
||||||
|
BUG();
|
||||||
|
}
|
||||||
|
|
||||||
/* This needs to make sure the new pte page is pinned iff its being
|
/* This needs to make sure the new pte page is pinned iff its being
|
||||||
attached to a pinned pagetable. */
|
attached to a pinned pagetable. */
|
||||||
static void xen_alloc_pt(struct mm_struct *mm, u32 pfn)
|
static void xen_alloc_pt(struct mm_struct *mm, u32 pfn)
|
||||||
|
@ -677,9 +684,10 @@ static void xen_alloc_pt(struct mm_struct *mm, u32 pfn)
|
||||||
if (PagePinned(virt_to_page(mm->pgd))) {
|
if (PagePinned(virt_to_page(mm->pgd))) {
|
||||||
SetPagePinned(page);
|
SetPagePinned(page);
|
||||||
|
|
||||||
if (!PageHighMem(page))
|
if (!PageHighMem(page)) {
|
||||||
make_lowmem_page_readonly(__va(PFN_PHYS(pfn)));
|
make_lowmem_page_readonly(__va(PFN_PHYS(pfn)));
|
||||||
else
|
pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE, pfn);
|
||||||
|
} else
|
||||||
/* make sure there are no stray mappings of
|
/* make sure there are no stray mappings of
|
||||||
this page */
|
this page */
|
||||||
kmap_flush_unused();
|
kmap_flush_unused();
|
||||||
|
@ -692,9 +700,11 @@ static void xen_release_pt(u32 pfn)
|
||||||
struct page *page = pfn_to_page(pfn);
|
struct page *page = pfn_to_page(pfn);
|
||||||
|
|
||||||
if (PagePinned(page)) {
|
if (PagePinned(page)) {
|
||||||
if (!PageHighMem(page))
|
if (!PageHighMem(page)) {
|
||||||
|
pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, pfn);
|
||||||
make_lowmem_page_readwrite(__va(PFN_PHYS(pfn)));
|
make_lowmem_page_readwrite(__va(PFN_PHYS(pfn)));
|
||||||
}
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef CONFIG_HIGHPTE
|
#ifdef CONFIG_HIGHPTE
|
||||||
|
@ -738,7 +748,7 @@ static __init void xen_pagetable_setup_start(pgd_t *base)
|
||||||
pgd_t *xen_pgd = (pgd_t *)xen_start_info->pt_base;
|
pgd_t *xen_pgd = (pgd_t *)xen_start_info->pt_base;
|
||||||
|
|
||||||
/* special set_pte for pagetable initialization */
|
/* special set_pte for pagetable initialization */
|
||||||
paravirt_ops.set_pte = xen_set_pte_init;
|
pv_mmu_ops.set_pte = xen_set_pte_init;
|
||||||
|
|
||||||
init_mm.pgd = base;
|
init_mm.pgd = base;
|
||||||
/*
|
/*
|
||||||
|
@ -785,8 +795,8 @@ static __init void xen_pagetable_setup_done(pgd_t *base)
|
||||||
{
|
{
|
||||||
/* This will work as long as patching hasn't happened yet
|
/* This will work as long as patching hasn't happened yet
|
||||||
(which it hasn't) */
|
(which it hasn't) */
|
||||||
paravirt_ops.alloc_pt = xen_alloc_pt;
|
pv_mmu_ops.alloc_pt = xen_alloc_pt;
|
||||||
paravirt_ops.set_pte = xen_set_pte;
|
pv_mmu_ops.set_pte = xen_set_pte;
|
||||||
|
|
||||||
if (!xen_feature(XENFEAT_auto_translated_physmap)) {
|
if (!xen_feature(XENFEAT_auto_translated_physmap)) {
|
||||||
/*
|
/*
|
||||||
|
@ -808,15 +818,15 @@ static __init void xen_pagetable_setup_done(pgd_t *base)
|
||||||
/* Actually pin the pagetable down, but we can't set PG_pinned
|
/* Actually pin the pagetable down, but we can't set PG_pinned
|
||||||
yet because the page structures don't exist yet. */
|
yet because the page structures don't exist yet. */
|
||||||
{
|
{
|
||||||
struct mmuext_op op;
|
unsigned level;
|
||||||
|
|
||||||
#ifdef CONFIG_X86_PAE
|
#ifdef CONFIG_X86_PAE
|
||||||
op.cmd = MMUEXT_PIN_L3_TABLE;
|
level = MMUEXT_PIN_L3_TABLE;
|
||||||
#else
|
#else
|
||||||
op.cmd = MMUEXT_PIN_L3_TABLE;
|
level = MMUEXT_PIN_L2_TABLE;
|
||||||
#endif
|
#endif
|
||||||
op.arg1.mfn = pfn_to_mfn(PFN_DOWN(__pa(base)));
|
|
||||||
if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF))
|
pin_pagetable_pfn(level, PFN_DOWN(__pa(base)));
|
||||||
BUG();
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -833,12 +843,12 @@ void __init xen_setup_vcpu_info_placement(void)
|
||||||
if (have_vcpu_info_placement) {
|
if (have_vcpu_info_placement) {
|
||||||
printk(KERN_INFO "Xen: using vcpu_info placement\n");
|
printk(KERN_INFO "Xen: using vcpu_info placement\n");
|
||||||
|
|
||||||
paravirt_ops.save_fl = xen_save_fl_direct;
|
pv_irq_ops.save_fl = xen_save_fl_direct;
|
||||||
paravirt_ops.restore_fl = xen_restore_fl_direct;
|
pv_irq_ops.restore_fl = xen_restore_fl_direct;
|
||||||
paravirt_ops.irq_disable = xen_irq_disable_direct;
|
pv_irq_ops.irq_disable = xen_irq_disable_direct;
|
||||||
paravirt_ops.irq_enable = xen_irq_enable_direct;
|
pv_irq_ops.irq_enable = xen_irq_enable_direct;
|
||||||
paravirt_ops.read_cr2 = xen_read_cr2_direct;
|
pv_mmu_ops.read_cr2 = xen_read_cr2_direct;
|
||||||
paravirt_ops.iret = xen_iret_direct;
|
pv_cpu_ops.iret = xen_iret_direct;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -850,8 +860,8 @@ static unsigned xen_patch(u8 type, u16 clobbers, void *insnbuf,
|
||||||
|
|
||||||
start = end = reloc = NULL;
|
start = end = reloc = NULL;
|
||||||
|
|
||||||
#define SITE(x) \
|
#define SITE(op, x) \
|
||||||
case PARAVIRT_PATCH(x): \
|
case PARAVIRT_PATCH(op.x): \
|
||||||
if (have_vcpu_info_placement) { \
|
if (have_vcpu_info_placement) { \
|
||||||
start = (char *)xen_##x##_direct; \
|
start = (char *)xen_##x##_direct; \
|
||||||
end = xen_##x##_direct_end; \
|
end = xen_##x##_direct_end; \
|
||||||
|
@ -860,10 +870,10 @@ static unsigned xen_patch(u8 type, u16 clobbers, void *insnbuf,
|
||||||
goto patch_site
|
goto patch_site
|
||||||
|
|
||||||
switch (type) {
|
switch (type) {
|
||||||
SITE(irq_enable);
|
SITE(pv_irq_ops, irq_enable);
|
||||||
SITE(irq_disable);
|
SITE(pv_irq_ops, irq_disable);
|
||||||
SITE(save_fl);
|
SITE(pv_irq_ops, save_fl);
|
||||||
SITE(restore_fl);
|
SITE(pv_irq_ops, restore_fl);
|
||||||
#undef SITE
|
#undef SITE
|
||||||
|
|
||||||
patch_site:
|
patch_site:
|
||||||
|
@ -895,26 +905,32 @@ static unsigned xen_patch(u8 type, u16 clobbers, void *insnbuf,
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
static const struct paravirt_ops xen_paravirt_ops __initdata = {
|
static const struct pv_info xen_info __initdata = {
|
||||||
.paravirt_enabled = 1,
|
.paravirt_enabled = 1,
|
||||||
.shared_kernel_pmd = 0,
|
.shared_kernel_pmd = 0,
|
||||||
|
|
||||||
.name = "Xen",
|
.name = "Xen",
|
||||||
.banner = xen_banner,
|
};
|
||||||
|
|
||||||
|
static const struct pv_init_ops xen_init_ops __initdata = {
|
||||||
.patch = xen_patch,
|
.patch = xen_patch,
|
||||||
|
|
||||||
|
.banner = xen_banner,
|
||||||
.memory_setup = xen_memory_setup,
|
.memory_setup = xen_memory_setup,
|
||||||
.arch_setup = xen_arch_setup,
|
.arch_setup = xen_arch_setup,
|
||||||
.init_IRQ = xen_init_IRQ,
|
|
||||||
.post_allocator_init = xen_mark_init_mm_pinned,
|
.post_allocator_init = xen_mark_init_mm_pinned,
|
||||||
|
};
|
||||||
|
|
||||||
|
static const struct pv_time_ops xen_time_ops __initdata = {
|
||||||
.time_init = xen_time_init,
|
.time_init = xen_time_init,
|
||||||
|
|
||||||
.set_wallclock = xen_set_wallclock,
|
.set_wallclock = xen_set_wallclock,
|
||||||
.get_wallclock = xen_get_wallclock,
|
.get_wallclock = xen_get_wallclock,
|
||||||
.get_cpu_khz = xen_cpu_khz,
|
.get_cpu_khz = xen_cpu_khz,
|
||||||
.sched_clock = xen_sched_clock,
|
.sched_clock = xen_sched_clock,
|
||||||
|
};
|
||||||
|
|
||||||
|
static const struct pv_cpu_ops xen_cpu_ops __initdata = {
|
||||||
.cpuid = xen_cpuid,
|
.cpuid = xen_cpuid,
|
||||||
|
|
||||||
.set_debugreg = xen_set_debugreg,
|
.set_debugreg = xen_set_debugreg,
|
||||||
|
@ -925,22 +941,10 @@ static const struct paravirt_ops xen_paravirt_ops __initdata = {
|
||||||
.read_cr0 = native_read_cr0,
|
.read_cr0 = native_read_cr0,
|
||||||
.write_cr0 = native_write_cr0,
|
.write_cr0 = native_write_cr0,
|
||||||
|
|
||||||
.read_cr2 = xen_read_cr2,
|
|
||||||
.write_cr2 = xen_write_cr2,
|
|
||||||
|
|
||||||
.read_cr3 = xen_read_cr3,
|
|
||||||
.write_cr3 = xen_write_cr3,
|
|
||||||
|
|
||||||
.read_cr4 = native_read_cr4,
|
.read_cr4 = native_read_cr4,
|
||||||
.read_cr4_safe = native_read_cr4_safe,
|
.read_cr4_safe = native_read_cr4_safe,
|
||||||
.write_cr4 = xen_write_cr4,
|
.write_cr4 = xen_write_cr4,
|
||||||
|
|
||||||
.save_fl = xen_save_fl,
|
|
||||||
.restore_fl = xen_restore_fl,
|
|
||||||
.irq_disable = xen_irq_disable,
|
|
||||||
.irq_enable = xen_irq_enable,
|
|
||||||
.safe_halt = xen_safe_halt,
|
|
||||||
.halt = xen_halt,
|
|
||||||
.wbinvd = native_wbinvd,
|
.wbinvd = native_wbinvd,
|
||||||
|
|
||||||
.read_msr = native_read_msr_safe,
|
.read_msr = native_read_msr_safe,
|
||||||
|
@ -969,6 +973,23 @@ static const struct paravirt_ops xen_paravirt_ops __initdata = {
|
||||||
.set_iopl_mask = xen_set_iopl_mask,
|
.set_iopl_mask = xen_set_iopl_mask,
|
||||||
.io_delay = xen_io_delay,
|
.io_delay = xen_io_delay,
|
||||||
|
|
||||||
|
.lazy_mode = {
|
||||||
|
.enter = paravirt_enter_lazy_cpu,
|
||||||
|
.leave = xen_leave_lazy,
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
static const struct pv_irq_ops xen_irq_ops __initdata = {
|
||||||
|
.init_IRQ = xen_init_IRQ,
|
||||||
|
.save_fl = xen_save_fl,
|
||||||
|
.restore_fl = xen_restore_fl,
|
||||||
|
.irq_disable = xen_irq_disable,
|
||||||
|
.irq_enable = xen_irq_enable,
|
||||||
|
.safe_halt = xen_safe_halt,
|
||||||
|
.halt = xen_halt,
|
||||||
|
};
|
||||||
|
|
||||||
|
static const struct pv_apic_ops xen_apic_ops __initdata = {
|
||||||
#ifdef CONFIG_X86_LOCAL_APIC
|
#ifdef CONFIG_X86_LOCAL_APIC
|
||||||
.apic_write = xen_apic_write,
|
.apic_write = xen_apic_write,
|
||||||
.apic_write_atomic = xen_apic_write,
|
.apic_write_atomic = xen_apic_write,
|
||||||
|
@ -977,6 +998,17 @@ static const struct paravirt_ops xen_paravirt_ops __initdata = {
|
||||||
.setup_secondary_clock = paravirt_nop,
|
.setup_secondary_clock = paravirt_nop,
|
||||||
.startup_ipi_hook = paravirt_nop,
|
.startup_ipi_hook = paravirt_nop,
|
||||||
#endif
|
#endif
|
||||||
|
};
|
||||||
|
|
||||||
|
static const struct pv_mmu_ops xen_mmu_ops __initdata = {
|
||||||
|
.pagetable_setup_start = xen_pagetable_setup_start,
|
||||||
|
.pagetable_setup_done = xen_pagetable_setup_done,
|
||||||
|
|
||||||
|
.read_cr2 = xen_read_cr2,
|
||||||
|
.write_cr2 = xen_write_cr2,
|
||||||
|
|
||||||
|
.read_cr3 = xen_read_cr3,
|
||||||
|
.write_cr3 = xen_write_cr3,
|
||||||
|
|
||||||
.flush_tlb_user = xen_flush_tlb,
|
.flush_tlb_user = xen_flush_tlb,
|
||||||
.flush_tlb_kernel = xen_flush_tlb,
|
.flush_tlb_kernel = xen_flush_tlb,
|
||||||
|
@ -986,9 +1018,6 @@ static const struct paravirt_ops xen_paravirt_ops __initdata = {
|
||||||
.pte_update = paravirt_nop,
|
.pte_update = paravirt_nop,
|
||||||
.pte_update_defer = paravirt_nop,
|
.pte_update_defer = paravirt_nop,
|
||||||
|
|
||||||
.pagetable_setup_start = xen_pagetable_setup_start,
|
|
||||||
.pagetable_setup_done = xen_pagetable_setup_done,
|
|
||||||
|
|
||||||
.alloc_pt = xen_alloc_pt_init,
|
.alloc_pt = xen_alloc_pt_init,
|
||||||
.release_pt = xen_release_pt,
|
.release_pt = xen_release_pt,
|
||||||
.alloc_pd = paravirt_nop,
|
.alloc_pd = paravirt_nop,
|
||||||
|
@ -1024,7 +1053,10 @@ static const struct paravirt_ops xen_paravirt_ops __initdata = {
|
||||||
.dup_mmap = xen_dup_mmap,
|
.dup_mmap = xen_dup_mmap,
|
||||||
.exit_mmap = xen_exit_mmap,
|
.exit_mmap = xen_exit_mmap,
|
||||||
|
|
||||||
.set_lazy_mode = xen_set_lazy_mode,
|
.lazy_mode = {
|
||||||
|
.enter = paravirt_enter_lazy_mmu,
|
||||||
|
.leave = xen_leave_lazy,
|
||||||
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
#ifdef CONFIG_SMP
|
#ifdef CONFIG_SMP
|
||||||
|
@ -1080,6 +1112,17 @@ static const struct machine_ops __initdata xen_machine_ops = {
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
static void __init xen_reserve_top(void)
|
||||||
|
{
|
||||||
|
unsigned long top = HYPERVISOR_VIRT_START;
|
||||||
|
struct xen_platform_parameters pp;
|
||||||
|
|
||||||
|
if (HYPERVISOR_xen_version(XENVER_platform_parameters, &pp) == 0)
|
||||||
|
top = pp.virt_start;
|
||||||
|
|
||||||
|
reserve_top_address(-top + 2 * PAGE_SIZE);
|
||||||
|
}
|
||||||
|
|
||||||
/* First C function to be called on Xen boot */
|
/* First C function to be called on Xen boot */
|
||||||
asmlinkage void __init xen_start_kernel(void)
|
asmlinkage void __init xen_start_kernel(void)
|
||||||
{
|
{
|
||||||
|
@ -1091,7 +1134,14 @@ asmlinkage void __init xen_start_kernel(void)
|
||||||
BUG_ON(memcmp(xen_start_info->magic, "xen-3.0", 7) != 0);
|
BUG_ON(memcmp(xen_start_info->magic, "xen-3.0", 7) != 0);
|
||||||
|
|
||||||
/* Install Xen paravirt ops */
|
/* Install Xen paravirt ops */
|
||||||
paravirt_ops = xen_paravirt_ops;
|
pv_info = xen_info;
|
||||||
|
pv_init_ops = xen_init_ops;
|
||||||
|
pv_time_ops = xen_time_ops;
|
||||||
|
pv_cpu_ops = xen_cpu_ops;
|
||||||
|
pv_irq_ops = xen_irq_ops;
|
||||||
|
pv_apic_ops = xen_apic_ops;
|
||||||
|
pv_mmu_ops = xen_mmu_ops;
|
||||||
|
|
||||||
machine_ops = xen_machine_ops;
|
machine_ops = xen_machine_ops;
|
||||||
|
|
||||||
#ifdef CONFIG_SMP
|
#ifdef CONFIG_SMP
|
||||||
|
@ -1113,6 +1163,7 @@ asmlinkage void __init xen_start_kernel(void)
|
||||||
/* keep using Xen gdt for now; no urgent need to change it */
|
/* keep using Xen gdt for now; no urgent need to change it */
|
||||||
|
|
||||||
x86_write_percpu(xen_cr3, __pa(pgd));
|
x86_write_percpu(xen_cr3, __pa(pgd));
|
||||||
|
x86_write_percpu(xen_current_cr3, __pa(pgd));
|
||||||
|
|
||||||
#ifdef CONFIG_SMP
|
#ifdef CONFIG_SMP
|
||||||
/* Don't do the full vcpu_info placement stuff until we have a
|
/* Don't do the full vcpu_info placement stuff until we have a
|
||||||
|
@ -1124,12 +1175,12 @@ asmlinkage void __init xen_start_kernel(void)
|
||||||
xen_setup_vcpu_info_placement();
|
xen_setup_vcpu_info_placement();
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
paravirt_ops.kernel_rpl = 1;
|
pv_info.kernel_rpl = 1;
|
||||||
if (xen_feature(XENFEAT_supervisor_mode_kernel))
|
if (xen_feature(XENFEAT_supervisor_mode_kernel))
|
||||||
paravirt_ops.kernel_rpl = 0;
|
pv_info.kernel_rpl = 0;
|
||||||
|
|
||||||
/* set the limit of our address space */
|
/* set the limit of our address space */
|
||||||
reserve_top_address(-HYPERVISOR_VIRT_START + 2 * PAGE_SIZE);
|
xen_reserve_top();
|
||||||
|
|
||||||
/* set up basic CPUID stuff */
|
/* set up basic CPUID stuff */
|
||||||
cpu_detect(&new_cpu_data);
|
cpu_detect(&new_cpu_data);
|
||||||
|
|
|
@ -41,7 +41,6 @@
|
||||||
#include <linux/sched.h>
|
#include <linux/sched.h>
|
||||||
#include <linux/highmem.h>
|
#include <linux/highmem.h>
|
||||||
#include <linux/bug.h>
|
#include <linux/bug.h>
|
||||||
#include <linux/sched.h>
|
|
||||||
|
|
||||||
#include <asm/pgtable.h>
|
#include <asm/pgtable.h>
|
||||||
#include <asm/tlbflush.h>
|
#include <asm/tlbflush.h>
|
||||||
|
@ -155,7 +154,7 @@ void xen_set_pte_at(struct mm_struct *mm, unsigned long addr,
|
||||||
pte_t *ptep, pte_t pteval)
|
pte_t *ptep, pte_t pteval)
|
||||||
{
|
{
|
||||||
if (mm == current->mm || mm == &init_mm) {
|
if (mm == current->mm || mm == &init_mm) {
|
||||||
if (xen_get_lazy_mode() == PARAVIRT_LAZY_MMU) {
|
if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU) {
|
||||||
struct multicall_space mcs;
|
struct multicall_space mcs;
|
||||||
mcs = xen_mc_entry(0);
|
mcs = xen_mc_entry(0);
|
||||||
|
|
||||||
|
@ -304,7 +303,12 @@ pgd_t xen_make_pgd(unsigned long pgd)
|
||||||
}
|
}
|
||||||
#endif /* CONFIG_X86_PAE */
|
#endif /* CONFIG_X86_PAE */
|
||||||
|
|
||||||
|
enum pt_level {
|
||||||
|
PT_PGD,
|
||||||
|
PT_PUD,
|
||||||
|
PT_PMD,
|
||||||
|
PT_PTE
|
||||||
|
};
|
||||||
|
|
||||||
/*
|
/*
|
||||||
(Yet another) pagetable walker. This one is intended for pinning a
|
(Yet another) pagetable walker. This one is intended for pinning a
|
||||||
|
@ -316,7 +320,7 @@ pgd_t xen_make_pgd(unsigned long pgd)
|
||||||
FIXADDR_TOP. But the important bit is that we don't pin beyond
|
FIXADDR_TOP. But the important bit is that we don't pin beyond
|
||||||
there, because then we start getting into Xen's ptes.
|
there, because then we start getting into Xen's ptes.
|
||||||
*/
|
*/
|
||||||
static int pgd_walk(pgd_t *pgd_base, int (*func)(struct page *, unsigned),
|
static int pgd_walk(pgd_t *pgd_base, int (*func)(struct page *, enum pt_level),
|
||||||
unsigned long limit)
|
unsigned long limit)
|
||||||
{
|
{
|
||||||
pgd_t *pgd = pgd_base;
|
pgd_t *pgd = pgd_base;
|
||||||
|
@ -341,7 +345,7 @@ static int pgd_walk(pgd_t *pgd_base, int (*func)(struct page *, unsigned),
|
||||||
pud = pud_offset(pgd, 0);
|
pud = pud_offset(pgd, 0);
|
||||||
|
|
||||||
if (PTRS_PER_PUD > 1) /* not folded */
|
if (PTRS_PER_PUD > 1) /* not folded */
|
||||||
flush |= (*func)(virt_to_page(pud), 0);
|
flush |= (*func)(virt_to_page(pud), PT_PUD);
|
||||||
|
|
||||||
for (; addr != pud_limit; pud++, addr = pud_next) {
|
for (; addr != pud_limit; pud++, addr = pud_next) {
|
||||||
pmd_t *pmd;
|
pmd_t *pmd;
|
||||||
|
@ -360,7 +364,7 @@ static int pgd_walk(pgd_t *pgd_base, int (*func)(struct page *, unsigned),
|
||||||
pmd = pmd_offset(pud, 0);
|
pmd = pmd_offset(pud, 0);
|
||||||
|
|
||||||
if (PTRS_PER_PMD > 1) /* not folded */
|
if (PTRS_PER_PMD > 1) /* not folded */
|
||||||
flush |= (*func)(virt_to_page(pmd), 0);
|
flush |= (*func)(virt_to_page(pmd), PT_PMD);
|
||||||
|
|
||||||
for (; addr != pmd_limit; pmd++) {
|
for (; addr != pmd_limit; pmd++) {
|
||||||
addr += (PAGE_SIZE * PTRS_PER_PTE);
|
addr += (PAGE_SIZE * PTRS_PER_PTE);
|
||||||
|
@ -372,17 +376,47 @@ static int pgd_walk(pgd_t *pgd_base, int (*func)(struct page *, unsigned),
|
||||||
if (pmd_none(*pmd))
|
if (pmd_none(*pmd))
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
flush |= (*func)(pmd_page(*pmd), 0);
|
flush |= (*func)(pmd_page(*pmd), PT_PTE);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
flush |= (*func)(virt_to_page(pgd_base), UVMF_TLB_FLUSH);
|
flush |= (*func)(virt_to_page(pgd_base), PT_PGD);
|
||||||
|
|
||||||
return flush;
|
return flush;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int pin_page(struct page *page, unsigned flags)
|
static spinlock_t *lock_pte(struct page *page)
|
||||||
|
{
|
||||||
|
spinlock_t *ptl = NULL;
|
||||||
|
|
||||||
|
#if NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS
|
||||||
|
ptl = __pte_lockptr(page);
|
||||||
|
spin_lock(ptl);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
return ptl;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void do_unlock(void *v)
|
||||||
|
{
|
||||||
|
spinlock_t *ptl = v;
|
||||||
|
spin_unlock(ptl);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void xen_do_pin(unsigned level, unsigned long pfn)
|
||||||
|
{
|
||||||
|
struct mmuext_op *op;
|
||||||
|
struct multicall_space mcs;
|
||||||
|
|
||||||
|
mcs = __xen_mc_entry(sizeof(*op));
|
||||||
|
op = mcs.args;
|
||||||
|
op->cmd = level;
|
||||||
|
op->arg1.mfn = pfn_to_mfn(pfn);
|
||||||
|
MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int pin_page(struct page *page, enum pt_level level)
|
||||||
{
|
{
|
||||||
unsigned pgfl = test_and_set_bit(PG_pinned, &page->flags);
|
unsigned pgfl = test_and_set_bit(PG_pinned, &page->flags);
|
||||||
int flush;
|
int flush;
|
||||||
|
@ -397,12 +431,26 @@ static int pin_page(struct page *page, unsigned flags)
|
||||||
void *pt = lowmem_page_address(page);
|
void *pt = lowmem_page_address(page);
|
||||||
unsigned long pfn = page_to_pfn(page);
|
unsigned long pfn = page_to_pfn(page);
|
||||||
struct multicall_space mcs = __xen_mc_entry(0);
|
struct multicall_space mcs = __xen_mc_entry(0);
|
||||||
|
spinlock_t *ptl;
|
||||||
|
|
||||||
flush = 0;
|
flush = 0;
|
||||||
|
|
||||||
|
ptl = NULL;
|
||||||
|
if (level == PT_PTE)
|
||||||
|
ptl = lock_pte(page);
|
||||||
|
|
||||||
MULTI_update_va_mapping(mcs.mc, (unsigned long)pt,
|
MULTI_update_va_mapping(mcs.mc, (unsigned long)pt,
|
||||||
pfn_pte(pfn, PAGE_KERNEL_RO),
|
pfn_pte(pfn, PAGE_KERNEL_RO),
|
||||||
flags);
|
level == PT_PGD ? UVMF_TLB_FLUSH : 0);
|
||||||
|
|
||||||
|
if (level == PT_PTE)
|
||||||
|
xen_do_pin(MMUEXT_PIN_L1_TABLE, pfn);
|
||||||
|
|
||||||
|
if (ptl) {
|
||||||
|
/* Queue a deferred unlock for when this batch
|
||||||
|
is completed. */
|
||||||
|
xen_mc_callback(do_unlock, ptl);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return flush;
|
return flush;
|
||||||
|
@ -413,8 +461,7 @@ static int pin_page(struct page *page, unsigned flags)
|
||||||
read-only, and can be pinned. */
|
read-only, and can be pinned. */
|
||||||
void xen_pgd_pin(pgd_t *pgd)
|
void xen_pgd_pin(pgd_t *pgd)
|
||||||
{
|
{
|
||||||
struct multicall_space mcs;
|
unsigned level;
|
||||||
struct mmuext_op *op;
|
|
||||||
|
|
||||||
xen_mc_batch();
|
xen_mc_batch();
|
||||||
|
|
||||||
|
@ -425,16 +472,13 @@ void xen_pgd_pin(pgd_t *pgd)
|
||||||
xen_mc_batch();
|
xen_mc_batch();
|
||||||
}
|
}
|
||||||
|
|
||||||
mcs = __xen_mc_entry(sizeof(*op));
|
|
||||||
op = mcs.args;
|
|
||||||
|
|
||||||
#ifdef CONFIG_X86_PAE
|
#ifdef CONFIG_X86_PAE
|
||||||
op->cmd = MMUEXT_PIN_L3_TABLE;
|
level = MMUEXT_PIN_L3_TABLE;
|
||||||
#else
|
#else
|
||||||
op->cmd = MMUEXT_PIN_L2_TABLE;
|
level = MMUEXT_PIN_L2_TABLE;
|
||||||
#endif
|
#endif
|
||||||
op->arg1.mfn = pfn_to_mfn(PFN_DOWN(__pa(pgd)));
|
|
||||||
MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF);
|
xen_do_pin(level, PFN_DOWN(__pa(pgd)));
|
||||||
|
|
||||||
xen_mc_issue(0);
|
xen_mc_issue(0);
|
||||||
}
|
}
|
||||||
|
@ -442,7 +486,7 @@ void xen_pgd_pin(pgd_t *pgd)
|
||||||
/* The init_mm pagetable is really pinned as soon as its created, but
|
/* The init_mm pagetable is really pinned as soon as its created, but
|
||||||
that's before we have page structures to store the bits. So do all
|
that's before we have page structures to store the bits. So do all
|
||||||
the book-keeping now. */
|
the book-keeping now. */
|
||||||
static __init int mark_pinned(struct page *page, unsigned flags)
|
static __init int mark_pinned(struct page *page, enum pt_level level)
|
||||||
{
|
{
|
||||||
SetPagePinned(page);
|
SetPagePinned(page);
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -453,18 +497,32 @@ void __init xen_mark_init_mm_pinned(void)
|
||||||
pgd_walk(init_mm.pgd, mark_pinned, FIXADDR_TOP);
|
pgd_walk(init_mm.pgd, mark_pinned, FIXADDR_TOP);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int unpin_page(struct page *page, unsigned flags)
|
static int unpin_page(struct page *page, enum pt_level level)
|
||||||
{
|
{
|
||||||
unsigned pgfl = test_and_clear_bit(PG_pinned, &page->flags);
|
unsigned pgfl = test_and_clear_bit(PG_pinned, &page->flags);
|
||||||
|
|
||||||
if (pgfl && !PageHighMem(page)) {
|
if (pgfl && !PageHighMem(page)) {
|
||||||
void *pt = lowmem_page_address(page);
|
void *pt = lowmem_page_address(page);
|
||||||
unsigned long pfn = page_to_pfn(page);
|
unsigned long pfn = page_to_pfn(page);
|
||||||
struct multicall_space mcs = __xen_mc_entry(0);
|
spinlock_t *ptl = NULL;
|
||||||
|
struct multicall_space mcs;
|
||||||
|
|
||||||
|
if (level == PT_PTE) {
|
||||||
|
ptl = lock_pte(page);
|
||||||
|
|
||||||
|
xen_do_pin(MMUEXT_UNPIN_TABLE, pfn);
|
||||||
|
}
|
||||||
|
|
||||||
|
mcs = __xen_mc_entry(0);
|
||||||
|
|
||||||
MULTI_update_va_mapping(mcs.mc, (unsigned long)pt,
|
MULTI_update_va_mapping(mcs.mc, (unsigned long)pt,
|
||||||
pfn_pte(pfn, PAGE_KERNEL),
|
pfn_pte(pfn, PAGE_KERNEL),
|
||||||
flags);
|
level == PT_PGD ? UVMF_TLB_FLUSH : 0);
|
||||||
|
|
||||||
|
if (ptl) {
|
||||||
|
/* unlock when batch completed */
|
||||||
|
xen_mc_callback(do_unlock, ptl);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return 0; /* never need to flush on unpin */
|
return 0; /* never need to flush on unpin */
|
||||||
|
@ -473,18 +531,9 @@ static int unpin_page(struct page *page, unsigned flags)
|
||||||
/* Release a pagetables pages back as normal RW */
|
/* Release a pagetables pages back as normal RW */
|
||||||
static void xen_pgd_unpin(pgd_t *pgd)
|
static void xen_pgd_unpin(pgd_t *pgd)
|
||||||
{
|
{
|
||||||
struct mmuext_op *op;
|
|
||||||
struct multicall_space mcs;
|
|
||||||
|
|
||||||
xen_mc_batch();
|
xen_mc_batch();
|
||||||
|
|
||||||
mcs = __xen_mc_entry(sizeof(*op));
|
xen_do_pin(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd)));
|
||||||
|
|
||||||
op = mcs.args;
|
|
||||||
op->cmd = MMUEXT_UNPIN_TABLE;
|
|
||||||
op->arg1.mfn = pfn_to_mfn(PFN_DOWN(__pa(pgd)));
|
|
||||||
|
|
||||||
MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF);
|
|
||||||
|
|
||||||
pgd_walk(pgd, unpin_page, TASK_SIZE);
|
pgd_walk(pgd, unpin_page, TASK_SIZE);
|
||||||
|
|
||||||
|
@ -515,20 +564,43 @@ static void drop_other_mm_ref(void *info)
|
||||||
|
|
||||||
if (__get_cpu_var(cpu_tlbstate).active_mm == mm)
|
if (__get_cpu_var(cpu_tlbstate).active_mm == mm)
|
||||||
leave_mm(smp_processor_id());
|
leave_mm(smp_processor_id());
|
||||||
|
|
||||||
|
/* If this cpu still has a stale cr3 reference, then make sure
|
||||||
|
it has been flushed. */
|
||||||
|
if (x86_read_percpu(xen_current_cr3) == __pa(mm->pgd)) {
|
||||||
|
load_cr3(swapper_pg_dir);
|
||||||
|
arch_flush_lazy_cpu_mode();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void drop_mm_ref(struct mm_struct *mm)
|
static void drop_mm_ref(struct mm_struct *mm)
|
||||||
{
|
{
|
||||||
|
cpumask_t mask;
|
||||||
|
unsigned cpu;
|
||||||
|
|
||||||
if (current->active_mm == mm) {
|
if (current->active_mm == mm) {
|
||||||
if (current->mm == mm)
|
if (current->mm == mm)
|
||||||
load_cr3(swapper_pg_dir);
|
load_cr3(swapper_pg_dir);
|
||||||
else
|
else
|
||||||
leave_mm(smp_processor_id());
|
leave_mm(smp_processor_id());
|
||||||
|
arch_flush_lazy_cpu_mode();
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!cpus_empty(mm->cpu_vm_mask))
|
/* Get the "official" set of cpus referring to our pagetable. */
|
||||||
xen_smp_call_function_mask(mm->cpu_vm_mask, drop_other_mm_ref,
|
mask = mm->cpu_vm_mask;
|
||||||
mm, 1);
|
|
||||||
|
/* It's possible that a vcpu may have a stale reference to our
|
||||||
|
cr3, because its in lazy mode, and it hasn't yet flushed
|
||||||
|
its set of pending hypercalls yet. In this case, we can
|
||||||
|
look at its actual current cr3 value, and force it to flush
|
||||||
|
if needed. */
|
||||||
|
for_each_online_cpu(cpu) {
|
||||||
|
if (per_cpu(xen_current_cr3, cpu) == __pa(mm->pgd))
|
||||||
|
cpu_set(cpu, mask);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!cpus_empty(mask))
|
||||||
|
xen_smp_call_function_mask(mask, drop_other_mm_ref, mm, 1);
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
static void drop_mm_ref(struct mm_struct *mm)
|
static void drop_mm_ref(struct mm_struct *mm)
|
||||||
|
@ -563,5 +635,6 @@ void xen_exit_mmap(struct mm_struct *mm)
|
||||||
/* pgd may not be pinned in the error exit path of execve */
|
/* pgd may not be pinned in the error exit path of execve */
|
||||||
if (PagePinned(virt_to_page(mm->pgd)))
|
if (PagePinned(virt_to_page(mm->pgd)))
|
||||||
xen_pgd_unpin(mm->pgd);
|
xen_pgd_unpin(mm->pgd);
|
||||||
|
|
||||||
spin_unlock(&mm->page_table_lock);
|
spin_unlock(&mm->page_table_lock);
|
||||||
}
|
}
|
||||||
|
|
|
@ -26,13 +26,22 @@
|
||||||
|
|
||||||
#include "multicalls.h"
|
#include "multicalls.h"
|
||||||
|
|
||||||
|
#define MC_DEBUG 1
|
||||||
|
|
||||||
#define MC_BATCH 32
|
#define MC_BATCH 32
|
||||||
#define MC_ARGS (MC_BATCH * 16 / sizeof(u64))
|
#define MC_ARGS (MC_BATCH * 16 / sizeof(u64))
|
||||||
|
|
||||||
struct mc_buffer {
|
struct mc_buffer {
|
||||||
struct multicall_entry entries[MC_BATCH];
|
struct multicall_entry entries[MC_BATCH];
|
||||||
|
#if MC_DEBUG
|
||||||
|
struct multicall_entry debug[MC_BATCH];
|
||||||
|
#endif
|
||||||
u64 args[MC_ARGS];
|
u64 args[MC_ARGS];
|
||||||
unsigned mcidx, argidx;
|
struct callback {
|
||||||
|
void (*fn)(void *);
|
||||||
|
void *data;
|
||||||
|
} callbacks[MC_BATCH];
|
||||||
|
unsigned mcidx, argidx, cbidx;
|
||||||
};
|
};
|
||||||
|
|
||||||
static DEFINE_PER_CPU(struct mc_buffer, mc_buffer);
|
static DEFINE_PER_CPU(struct mc_buffer, mc_buffer);
|
||||||
|
@ -43,6 +52,7 @@ void xen_mc_flush(void)
|
||||||
struct mc_buffer *b = &__get_cpu_var(mc_buffer);
|
struct mc_buffer *b = &__get_cpu_var(mc_buffer);
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
unsigned long flags;
|
unsigned long flags;
|
||||||
|
int i;
|
||||||
|
|
||||||
BUG_ON(preemptible());
|
BUG_ON(preemptible());
|
||||||
|
|
||||||
|
@ -51,13 +61,31 @@ void xen_mc_flush(void)
|
||||||
local_irq_save(flags);
|
local_irq_save(flags);
|
||||||
|
|
||||||
if (b->mcidx) {
|
if (b->mcidx) {
|
||||||
int i;
|
#if MC_DEBUG
|
||||||
|
memcpy(b->debug, b->entries,
|
||||||
|
b->mcidx * sizeof(struct multicall_entry));
|
||||||
|
#endif
|
||||||
|
|
||||||
if (HYPERVISOR_multicall(b->entries, b->mcidx) != 0)
|
if (HYPERVISOR_multicall(b->entries, b->mcidx) != 0)
|
||||||
BUG();
|
BUG();
|
||||||
for (i = 0; i < b->mcidx; i++)
|
for (i = 0; i < b->mcidx; i++)
|
||||||
if (b->entries[i].result < 0)
|
if (b->entries[i].result < 0)
|
||||||
ret++;
|
ret++;
|
||||||
|
|
||||||
|
#if MC_DEBUG
|
||||||
|
if (ret) {
|
||||||
|
printk(KERN_ERR "%d multicall(s) failed: cpu %d\n",
|
||||||
|
ret, smp_processor_id());
|
||||||
|
for(i = 0; i < b->mcidx; i++) {
|
||||||
|
printk(" call %2d/%d: op=%lu arg=[%lx] result=%ld\n",
|
||||||
|
i+1, b->mcidx,
|
||||||
|
b->debug[i].op,
|
||||||
|
b->debug[i].args[0],
|
||||||
|
b->entries[i].result);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
b->mcidx = 0;
|
b->mcidx = 0;
|
||||||
b->argidx = 0;
|
b->argidx = 0;
|
||||||
} else
|
} else
|
||||||
|
@ -65,6 +93,13 @@ void xen_mc_flush(void)
|
||||||
|
|
||||||
local_irq_restore(flags);
|
local_irq_restore(flags);
|
||||||
|
|
||||||
|
for(i = 0; i < b->cbidx; i++) {
|
||||||
|
struct callback *cb = &b->callbacks[i];
|
||||||
|
|
||||||
|
(*cb->fn)(cb->data);
|
||||||
|
}
|
||||||
|
b->cbidx = 0;
|
||||||
|
|
||||||
BUG_ON(ret);
|
BUG_ON(ret);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -88,3 +123,16 @@ struct multicall_space __xen_mc_entry(size_t args)
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void xen_mc_callback(void (*fn)(void *), void *data)
|
||||||
|
{
|
||||||
|
struct mc_buffer *b = &__get_cpu_var(mc_buffer);
|
||||||
|
struct callback *cb;
|
||||||
|
|
||||||
|
if (b->cbidx == MC_BATCH)
|
||||||
|
xen_mc_flush();
|
||||||
|
|
||||||
|
cb = &b->callbacks[b->cbidx++];
|
||||||
|
cb->fn = fn;
|
||||||
|
cb->data = data;
|
||||||
|
}
|
||||||
|
|
|
@ -35,11 +35,14 @@ void xen_mc_flush(void);
|
||||||
/* Issue a multicall if we're not in a lazy mode */
|
/* Issue a multicall if we're not in a lazy mode */
|
||||||
static inline void xen_mc_issue(unsigned mode)
|
static inline void xen_mc_issue(unsigned mode)
|
||||||
{
|
{
|
||||||
if ((xen_get_lazy_mode() & mode) == 0)
|
if ((paravirt_get_lazy_mode() & mode) == 0)
|
||||||
xen_mc_flush();
|
xen_mc_flush();
|
||||||
|
|
||||||
/* restore flags saved in xen_mc_batch */
|
/* restore flags saved in xen_mc_batch */
|
||||||
local_irq_restore(x86_read_percpu(xen_mc_irq_flags));
|
local_irq_restore(x86_read_percpu(xen_mc_irq_flags));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Set up a callback to be called when the current batch is flushed */
|
||||||
|
void xen_mc_callback(void (*fn)(void *), void *data);
|
||||||
|
|
||||||
#endif /* _XEN_MULTICALLS_H */
|
#endif /* _XEN_MULTICALLS_H */
|
||||||
|
|
|
@ -370,7 +370,8 @@ int xen_smp_call_function_mask(cpumask_t mask, void (*func)(void *),
|
||||||
void *info, int wait)
|
void *info, int wait)
|
||||||
{
|
{
|
||||||
struct call_data_struct data;
|
struct call_data_struct data;
|
||||||
int cpus;
|
int cpus, cpu;
|
||||||
|
bool yield;
|
||||||
|
|
||||||
/* Holding any lock stops cpus from going down. */
|
/* Holding any lock stops cpus from going down. */
|
||||||
spin_lock(&call_lock);
|
spin_lock(&call_lock);
|
||||||
|
@ -399,8 +400,13 @@ int xen_smp_call_function_mask(cpumask_t mask, void (*func)(void *),
|
||||||
/* Send a message to other CPUs and wait for them to respond */
|
/* Send a message to other CPUs and wait for them to respond */
|
||||||
xen_send_IPI_mask(mask, XEN_CALL_FUNCTION_VECTOR);
|
xen_send_IPI_mask(mask, XEN_CALL_FUNCTION_VECTOR);
|
||||||
|
|
||||||
/* Make sure other vcpus get a chance to run.
|
/* Make sure other vcpus get a chance to run if they need to. */
|
||||||
XXX too severe? Maybe we should check the other CPU's states? */
|
yield = false;
|
||||||
|
for_each_cpu_mask(cpu, mask)
|
||||||
|
if (xen_vcpu_stolen(cpu))
|
||||||
|
yield = true;
|
||||||
|
|
||||||
|
if (yield)
|
||||||
HYPERVISOR_sched_op(SCHEDOP_yield, 0);
|
HYPERVISOR_sched_op(SCHEDOP_yield, 0);
|
||||||
|
|
||||||
/* Wait for response */
|
/* Wait for response */
|
||||||
|
|
|
@ -105,6 +105,12 @@ static void get_runstate_snapshot(struct vcpu_runstate_info *res)
|
||||||
} while (get64(&state->state_entry_time) != state_time);
|
} while (get64(&state->state_entry_time) != state_time);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* return true when a vcpu could run but has no real cpu to run on */
|
||||||
|
bool xen_vcpu_stolen(int vcpu)
|
||||||
|
{
|
||||||
|
return per_cpu(runstate, vcpu).state == RUNSTATE_runnable;
|
||||||
|
}
|
||||||
|
|
||||||
static void setup_runstate_info(int cpu)
|
static void setup_runstate_info(int cpu)
|
||||||
{
|
{
|
||||||
struct vcpu_register_runstate_memory_area area;
|
struct vcpu_register_runstate_memory_area area;
|
||||||
|
|
|
@ -11,6 +11,7 @@ void xen_copy_trap_info(struct trap_info *traps);
|
||||||
|
|
||||||
DECLARE_PER_CPU(struct vcpu_info *, xen_vcpu);
|
DECLARE_PER_CPU(struct vcpu_info *, xen_vcpu);
|
||||||
DECLARE_PER_CPU(unsigned long, xen_cr3);
|
DECLARE_PER_CPU(unsigned long, xen_cr3);
|
||||||
|
DECLARE_PER_CPU(unsigned long, xen_current_cr3);
|
||||||
|
|
||||||
extern struct start_info *xen_start_info;
|
extern struct start_info *xen_start_info;
|
||||||
extern struct shared_info *HYPERVISOR_shared_info;
|
extern struct shared_info *HYPERVISOR_shared_info;
|
||||||
|
@ -27,15 +28,10 @@ unsigned long xen_get_wallclock(void);
|
||||||
int xen_set_wallclock(unsigned long time);
|
int xen_set_wallclock(unsigned long time);
|
||||||
unsigned long long xen_sched_clock(void);
|
unsigned long long xen_sched_clock(void);
|
||||||
|
|
||||||
|
bool xen_vcpu_stolen(int vcpu);
|
||||||
|
|
||||||
void xen_mark_init_mm_pinned(void);
|
void xen_mark_init_mm_pinned(void);
|
||||||
|
|
||||||
DECLARE_PER_CPU(enum paravirt_lazy_mode, xen_lazy_mode);
|
|
||||||
|
|
||||||
static inline unsigned xen_get_lazy_mode(void)
|
|
||||||
{
|
|
||||||
return x86_read_percpu(xen_lazy_mode);
|
|
||||||
}
|
|
||||||
|
|
||||||
void __init xen_fill_possible_map(void);
|
void __init xen_fill_possible_map(void);
|
||||||
|
|
||||||
void __init xen_setup_vcpu_info_placement(void);
|
void __init xen_setup_vcpu_info_placement(void);
|
||||||
|
|
|
@ -115,7 +115,7 @@ static struct hv_ops lguest_cons = {
|
||||||
* (0), and the struct hv_ops containing the put_chars() function. */
|
* (0), and the struct hv_ops containing the put_chars() function. */
|
||||||
static int __init cons_init(void)
|
static int __init cons_init(void)
|
||||||
{
|
{
|
||||||
if (strcmp(paravirt_ops.name, "lguest") != 0)
|
if (strcmp(pv_info.name, "lguest") != 0)
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
return hvc_instantiate(0, 0, &lguest_cons);
|
return hvc_instantiate(0, 0, &lguest_cons);
|
||||||
|
|
|
@ -248,8 +248,8 @@ static void unmap_switcher(void)
|
||||||
}
|
}
|
||||||
|
|
||||||
/*H:130 Our Guest is usually so well behaved; it never tries to do things it
|
/*H:130 Our Guest is usually so well behaved; it never tries to do things it
|
||||||
* isn't allowed to. Unfortunately, "struct paravirt_ops" isn't quite
|
* isn't allowed to. Unfortunately, Linux's paravirtual infrastructure isn't
|
||||||
* complete, because it doesn't contain replacements for the Intel I/O
|
* quite complete, because it doesn't contain replacements for the Intel I/O
|
||||||
* instructions. As a result, the Guest sometimes fumbles across one during
|
* instructions. As a result, the Guest sometimes fumbles across one during
|
||||||
* the boot process as it probes for various things which are usually attached
|
* the boot process as it probes for various things which are usually attached
|
||||||
* to a PC.
|
* to a PC.
|
||||||
|
@ -694,7 +694,7 @@ static int __init init(void)
|
||||||
|
|
||||||
/* Lguest can't run under Xen, VMI or itself. It does Tricky Stuff. */
|
/* Lguest can't run under Xen, VMI or itself. It does Tricky Stuff. */
|
||||||
if (paravirt_enabled()) {
|
if (paravirt_enabled()) {
|
||||||
printk("lguest is afraid of %s\n", paravirt_ops.name);
|
printk("lguest is afraid of %s\n", pv_info.name);
|
||||||
return -EPERM;
|
return -EPERM;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -23,7 +23,7 @@
|
||||||
*
|
*
|
||||||
* So how does the kernel know it's a Guest? The Guest starts at a special
|
* So how does the kernel know it's a Guest? The Guest starts at a special
|
||||||
* entry point marked with a magic string, which sets up a few things then
|
* entry point marked with a magic string, which sets up a few things then
|
||||||
* calls here. We replace the native functions in "struct paravirt_ops"
|
* calls here. We replace the native functions various "paravirt" structures
|
||||||
* with our Guest versions, then boot like normal. :*/
|
* with our Guest versions, then boot like normal. :*/
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -97,29 +97,17 @@ static cycle_t clock_base;
|
||||||
* them as a batch when lazy_mode is eventually turned off. Because hypercalls
|
* them as a batch when lazy_mode is eventually turned off. Because hypercalls
|
||||||
* are reasonably expensive, batching them up makes sense. For example, a
|
* are reasonably expensive, batching them up makes sense. For example, a
|
||||||
* large mmap might update dozens of page table entries: that code calls
|
* large mmap might update dozens of page table entries: that code calls
|
||||||
* lguest_lazy_mode(PARAVIRT_LAZY_MMU), does the dozen updates, then calls
|
* paravirt_enter_lazy_mmu(), does the dozen updates, then calls
|
||||||
* lguest_lazy_mode(PARAVIRT_LAZY_NONE).
|
* lguest_leave_lazy_mode().
|
||||||
*
|
*
|
||||||
* So, when we're in lazy mode, we call async_hypercall() to store the call for
|
* So, when we're in lazy mode, we call async_hypercall() to store the call for
|
||||||
* future processing. When lazy mode is turned off we issue a hypercall to
|
* future processing. When lazy mode is turned off we issue a hypercall to
|
||||||
* flush the stored calls.
|
* flush the stored calls.
|
||||||
*
|
*/
|
||||||
* There's also a hack where "mode" is set to "PARAVIRT_LAZY_FLUSH" which
|
static void lguest_leave_lazy_mode(void)
|
||||||
* indicates we're to flush any outstanding calls immediately. This is used
|
|
||||||
* when an interrupt handler does a kmap_atomic(): the page table changes must
|
|
||||||
* happen immediately even if we're in the middle of a batch. Usually we're
|
|
||||||
* not, though, so there's nothing to do. */
|
|
||||||
static enum paravirt_lazy_mode lazy_mode; /* Note: not SMP-safe! */
|
|
||||||
static void lguest_lazy_mode(enum paravirt_lazy_mode mode)
|
|
||||||
{
|
{
|
||||||
if (mode == PARAVIRT_LAZY_FLUSH) {
|
paravirt_leave_lazy(paravirt_get_lazy_mode());
|
||||||
if (unlikely(lazy_mode != PARAVIRT_LAZY_NONE))
|
|
||||||
hcall(LHCALL_FLUSH_ASYNC, 0, 0, 0);
|
hcall(LHCALL_FLUSH_ASYNC, 0, 0, 0);
|
||||||
} else {
|
|
||||||
lazy_mode = mode;
|
|
||||||
if (mode == PARAVIRT_LAZY_NONE)
|
|
||||||
hcall(LHCALL_FLUSH_ASYNC, 0, 0, 0);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void lazy_hcall(unsigned long call,
|
static void lazy_hcall(unsigned long call,
|
||||||
|
@ -127,7 +115,7 @@ static void lazy_hcall(unsigned long call,
|
||||||
unsigned long arg2,
|
unsigned long arg2,
|
||||||
unsigned long arg3)
|
unsigned long arg3)
|
||||||
{
|
{
|
||||||
if (lazy_mode == PARAVIRT_LAZY_NONE)
|
if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE)
|
||||||
hcall(call, arg1, arg2, arg3);
|
hcall(call, arg1, arg2, arg3);
|
||||||
else
|
else
|
||||||
async_hcall(call, arg1, arg2, arg3);
|
async_hcall(call, arg1, arg2, arg3);
|
||||||
|
@ -331,7 +319,7 @@ static void lguest_load_tls(struct thread_struct *t, unsigned int cpu)
|
||||||
}
|
}
|
||||||
|
|
||||||
/*G:038 That's enough excitement for now, back to ploughing through each of
|
/*G:038 That's enough excitement for now, back to ploughing through each of
|
||||||
* the paravirt_ops (we're about 1/3 of the way through).
|
* the different pv_ops structures (we're about 1/3 of the way through).
|
||||||
*
|
*
|
||||||
* This is the Local Descriptor Table, another weird Intel thingy. Linux only
|
* This is the Local Descriptor Table, another weird Intel thingy. Linux only
|
||||||
* uses this for some strange applications like Wine. We don't do anything
|
* uses this for some strange applications like Wine. We don't do anything
|
||||||
|
@ -558,7 +546,7 @@ static void lguest_set_pte(pte_t *ptep, pte_t pteval)
|
||||||
lazy_hcall(LHCALL_FLUSH_TLB, 1, 0, 0);
|
lazy_hcall(LHCALL_FLUSH_TLB, 1, 0, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Unfortunately for Lguest, the paravirt_ops for page tables were based on
|
/* Unfortunately for Lguest, the pv_mmu_ops for page tables were based on
|
||||||
* native page table operations. On native hardware you can set a new page
|
* native page table operations. On native hardware you can set a new page
|
||||||
* table entry whenever you want, but if you want to remove one you have to do
|
* table entry whenever you want, but if you want to remove one you have to do
|
||||||
* a TLB flush (a TLB is a little cache of page table entries kept by the CPU).
|
* a TLB flush (a TLB is a little cache of page table entries kept by the CPU).
|
||||||
|
@ -782,7 +770,7 @@ static void lguest_time_init(void)
|
||||||
clocksource_register(&lguest_clock);
|
clocksource_register(&lguest_clock);
|
||||||
|
|
||||||
/* Now we've set up our clock, we can use it as the scheduler clock */
|
/* Now we've set up our clock, we can use it as the scheduler clock */
|
||||||
paravirt_ops.sched_clock = lguest_sched_clock;
|
pv_time_ops.sched_clock = lguest_sched_clock;
|
||||||
|
|
||||||
/* We can't set cpumask in the initializer: damn C limitations! Set it
|
/* We can't set cpumask in the initializer: damn C limitations! Set it
|
||||||
* here and register our timer device. */
|
* here and register our timer device. */
|
||||||
|
@ -904,7 +892,7 @@ static __init char *lguest_memory_setup(void)
|
||||||
/*G:050
|
/*G:050
|
||||||
* Patching (Powerfully Placating Performance Pedants)
|
* Patching (Powerfully Placating Performance Pedants)
|
||||||
*
|
*
|
||||||
* We have already seen that "struct paravirt_ops" lets us replace simple
|
* We have already seen that pv_ops structures let us replace simple
|
||||||
* native instructions with calls to the appropriate back end all throughout
|
* native instructions with calls to the appropriate back end all throughout
|
||||||
* the kernel. This allows the same kernel to run as a Guest and as a native
|
* the kernel. This allows the same kernel to run as a Guest and as a native
|
||||||
* kernel, but it's slow because of all the indirect branches.
|
* kernel, but it's slow because of all the indirect branches.
|
||||||
|
@ -929,10 +917,10 @@ static const struct lguest_insns
|
||||||
{
|
{
|
||||||
const char *start, *end;
|
const char *start, *end;
|
||||||
} lguest_insns[] = {
|
} lguest_insns[] = {
|
||||||
[PARAVIRT_PATCH(irq_disable)] = { lgstart_cli, lgend_cli },
|
[PARAVIRT_PATCH(pv_irq_ops.irq_disable)] = { lgstart_cli, lgend_cli },
|
||||||
[PARAVIRT_PATCH(irq_enable)] = { lgstart_sti, lgend_sti },
|
[PARAVIRT_PATCH(pv_irq_ops.irq_enable)] = { lgstart_sti, lgend_sti },
|
||||||
[PARAVIRT_PATCH(restore_fl)] = { lgstart_popf, lgend_popf },
|
[PARAVIRT_PATCH(pv_irq_ops.restore_fl)] = { lgstart_popf, lgend_popf },
|
||||||
[PARAVIRT_PATCH(save_fl)] = { lgstart_pushf, lgend_pushf },
|
[PARAVIRT_PATCH(pv_irq_ops.save_fl)] = { lgstart_pushf, lgend_pushf },
|
||||||
};
|
};
|
||||||
|
|
||||||
/* Now our patch routine is fairly simple (based on the native one in
|
/* Now our patch routine is fairly simple (based on the native one in
|
||||||
|
@ -959,9 +947,9 @@ static unsigned lguest_patch(u8 type, u16 clobber, void *ibuf,
|
||||||
return insn_len;
|
return insn_len;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*G:030 Once we get to lguest_init(), we know we're a Guest. The paravirt_ops
|
/*G:030 Once we get to lguest_init(), we know we're a Guest. The pv_ops
|
||||||
* structure in the kernel provides a single point for (almost) every routine
|
* structures in the kernel provide points for (almost) every routine we have
|
||||||
* we have to override to avoid privileged instructions. */
|
* to override to avoid privileged instructions. */
|
||||||
__init void lguest_init(void *boot)
|
__init void lguest_init(void *boot)
|
||||||
{
|
{
|
||||||
/* Copy boot parameters first: the Launcher put the physical location
|
/* Copy boot parameters first: the Launcher put the physical location
|
||||||
|
@ -976,54 +964,70 @@ __init void lguest_init(void *boot)
|
||||||
|
|
||||||
/* We're under lguest, paravirt is enabled, and we're running at
|
/* We're under lguest, paravirt is enabled, and we're running at
|
||||||
* privilege level 1, not 0 as normal. */
|
* privilege level 1, not 0 as normal. */
|
||||||
paravirt_ops.name = "lguest";
|
pv_info.name = "lguest";
|
||||||
paravirt_ops.paravirt_enabled = 1;
|
pv_info.paravirt_enabled = 1;
|
||||||
paravirt_ops.kernel_rpl = 1;
|
pv_info.kernel_rpl = 1;
|
||||||
|
|
||||||
/* We set up all the lguest overrides for sensitive operations. These
|
/* We set up all the lguest overrides for sensitive operations. These
|
||||||
* are detailed with the operations themselves. */
|
* are detailed with the operations themselves. */
|
||||||
paravirt_ops.save_fl = save_fl;
|
|
||||||
paravirt_ops.restore_fl = restore_fl;
|
/* interrupt-related operations */
|
||||||
paravirt_ops.irq_disable = irq_disable;
|
pv_irq_ops.init_IRQ = lguest_init_IRQ;
|
||||||
paravirt_ops.irq_enable = irq_enable;
|
pv_irq_ops.save_fl = save_fl;
|
||||||
paravirt_ops.load_gdt = lguest_load_gdt;
|
pv_irq_ops.restore_fl = restore_fl;
|
||||||
paravirt_ops.memory_setup = lguest_memory_setup;
|
pv_irq_ops.irq_disable = irq_disable;
|
||||||
paravirt_ops.cpuid = lguest_cpuid;
|
pv_irq_ops.irq_enable = irq_enable;
|
||||||
paravirt_ops.write_cr3 = lguest_write_cr3;
|
pv_irq_ops.safe_halt = lguest_safe_halt;
|
||||||
paravirt_ops.flush_tlb_user = lguest_flush_tlb_user;
|
|
||||||
paravirt_ops.flush_tlb_single = lguest_flush_tlb_single;
|
/* init-time operations */
|
||||||
paravirt_ops.flush_tlb_kernel = lguest_flush_tlb_kernel;
|
pv_init_ops.memory_setup = lguest_memory_setup;
|
||||||
paravirt_ops.set_pte = lguest_set_pte;
|
pv_init_ops.patch = lguest_patch;
|
||||||
paravirt_ops.set_pte_at = lguest_set_pte_at;
|
|
||||||
paravirt_ops.set_pmd = lguest_set_pmd;
|
/* Intercepts of various cpu instructions */
|
||||||
|
pv_cpu_ops.load_gdt = lguest_load_gdt;
|
||||||
|
pv_cpu_ops.cpuid = lguest_cpuid;
|
||||||
|
pv_cpu_ops.load_idt = lguest_load_idt;
|
||||||
|
pv_cpu_ops.iret = lguest_iret;
|
||||||
|
pv_cpu_ops.load_esp0 = lguest_load_esp0;
|
||||||
|
pv_cpu_ops.load_tr_desc = lguest_load_tr_desc;
|
||||||
|
pv_cpu_ops.set_ldt = lguest_set_ldt;
|
||||||
|
pv_cpu_ops.load_tls = lguest_load_tls;
|
||||||
|
pv_cpu_ops.set_debugreg = lguest_set_debugreg;
|
||||||
|
pv_cpu_ops.clts = lguest_clts;
|
||||||
|
pv_cpu_ops.read_cr0 = lguest_read_cr0;
|
||||||
|
pv_cpu_ops.write_cr0 = lguest_write_cr0;
|
||||||
|
pv_cpu_ops.read_cr4 = lguest_read_cr4;
|
||||||
|
pv_cpu_ops.write_cr4 = lguest_write_cr4;
|
||||||
|
pv_cpu_ops.write_gdt_entry = lguest_write_gdt_entry;
|
||||||
|
pv_cpu_ops.write_idt_entry = lguest_write_idt_entry;
|
||||||
|
pv_cpu_ops.wbinvd = lguest_wbinvd;
|
||||||
|
pv_cpu_ops.lazy_mode.enter = paravirt_enter_lazy_cpu;
|
||||||
|
pv_cpu_ops.lazy_mode.leave = lguest_leave_lazy_mode;
|
||||||
|
|
||||||
|
/* pagetable management */
|
||||||
|
pv_mmu_ops.write_cr3 = lguest_write_cr3;
|
||||||
|
pv_mmu_ops.flush_tlb_user = lguest_flush_tlb_user;
|
||||||
|
pv_mmu_ops.flush_tlb_single = lguest_flush_tlb_single;
|
||||||
|
pv_mmu_ops.flush_tlb_kernel = lguest_flush_tlb_kernel;
|
||||||
|
pv_mmu_ops.set_pte = lguest_set_pte;
|
||||||
|
pv_mmu_ops.set_pte_at = lguest_set_pte_at;
|
||||||
|
pv_mmu_ops.set_pmd = lguest_set_pmd;
|
||||||
|
pv_mmu_ops.read_cr2 = lguest_read_cr2;
|
||||||
|
pv_mmu_ops.read_cr3 = lguest_read_cr3;
|
||||||
|
pv_mmu_ops.lazy_mode.enter = paravirt_enter_lazy_mmu;
|
||||||
|
pv_mmu_ops.lazy_mode.leave = lguest_leave_lazy_mode;
|
||||||
|
|
||||||
#ifdef CONFIG_X86_LOCAL_APIC
|
#ifdef CONFIG_X86_LOCAL_APIC
|
||||||
paravirt_ops.apic_write = lguest_apic_write;
|
/* apic read/write intercepts */
|
||||||
paravirt_ops.apic_write_atomic = lguest_apic_write;
|
pv_apic_ops.apic_write = lguest_apic_write;
|
||||||
paravirt_ops.apic_read = lguest_apic_read;
|
pv_apic_ops.apic_write_atomic = lguest_apic_write;
|
||||||
|
pv_apic_ops.apic_read = lguest_apic_read;
|
||||||
#endif
|
#endif
|
||||||
paravirt_ops.load_idt = lguest_load_idt;
|
|
||||||
paravirt_ops.iret = lguest_iret;
|
/* time operations */
|
||||||
paravirt_ops.load_esp0 = lguest_load_esp0;
|
pv_time_ops.get_wallclock = lguest_get_wallclock;
|
||||||
paravirt_ops.load_tr_desc = lguest_load_tr_desc;
|
pv_time_ops.time_init = lguest_time_init;
|
||||||
paravirt_ops.set_ldt = lguest_set_ldt;
|
|
||||||
paravirt_ops.load_tls = lguest_load_tls;
|
|
||||||
paravirt_ops.set_debugreg = lguest_set_debugreg;
|
|
||||||
paravirt_ops.clts = lguest_clts;
|
|
||||||
paravirt_ops.read_cr0 = lguest_read_cr0;
|
|
||||||
paravirt_ops.write_cr0 = lguest_write_cr0;
|
|
||||||
paravirt_ops.init_IRQ = lguest_init_IRQ;
|
|
||||||
paravirt_ops.read_cr2 = lguest_read_cr2;
|
|
||||||
paravirt_ops.read_cr3 = lguest_read_cr3;
|
|
||||||
paravirt_ops.read_cr4 = lguest_read_cr4;
|
|
||||||
paravirt_ops.write_cr4 = lguest_write_cr4;
|
|
||||||
paravirt_ops.write_gdt_entry = lguest_write_gdt_entry;
|
|
||||||
paravirt_ops.write_idt_entry = lguest_write_idt_entry;
|
|
||||||
paravirt_ops.patch = lguest_patch;
|
|
||||||
paravirt_ops.safe_halt = lguest_safe_halt;
|
|
||||||
paravirt_ops.get_wallclock = lguest_get_wallclock;
|
|
||||||
paravirt_ops.time_init = lguest_time_init;
|
|
||||||
paravirt_ops.set_lazy_mode = lguest_lazy_mode;
|
|
||||||
paravirt_ops.wbinvd = lguest_wbinvd;
|
|
||||||
/* Now is a good time to look at the implementations of these functions
|
/* Now is a good time to look at the implementations of these functions
|
||||||
* before returning to the rest of lguest_init(). */
|
* before returning to the rest of lguest_init(). */
|
||||||
|
|
||||||
|
|
|
@ -201,7 +201,7 @@ static void scan_devices(void)
|
||||||
* "struct lguest_device_desc" array. */
|
* "struct lguest_device_desc" array. */
|
||||||
static int __init lguest_bus_init(void)
|
static int __init lguest_bus_init(void)
|
||||||
{
|
{
|
||||||
if (strcmp(paravirt_ops.name, "lguest") != 0)
|
if (strcmp(pv_info.name, "lguest") != 0)
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
/* Devices are in a single page above top of "normal" mem */
|
/* Devices are in a single page above top of "normal" mem */
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -2,7 +2,7 @@
|
||||||
#define _I386_PGTABLE_3LEVEL_DEFS_H
|
#define _I386_PGTABLE_3LEVEL_DEFS_H
|
||||||
|
|
||||||
#ifdef CONFIG_PARAVIRT
|
#ifdef CONFIG_PARAVIRT
|
||||||
#define SHARED_KERNEL_PMD (paravirt_ops.shared_kernel_pmd)
|
#define SHARED_KERNEL_PMD (pv_info.shared_kernel_pmd)
|
||||||
#else
|
#else
|
||||||
#define SHARED_KERNEL_PMD 1
|
#define SHARED_KERNEL_PMD 1
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -160,8 +160,9 @@ struct vcpu_set_singleshot_timer {
|
||||||
*/
|
*/
|
||||||
#define VCPUOP_register_vcpu_info 10 /* arg == struct vcpu_info */
|
#define VCPUOP_register_vcpu_info 10 /* arg == struct vcpu_info */
|
||||||
struct vcpu_register_vcpu_info {
|
struct vcpu_register_vcpu_info {
|
||||||
uint32_t mfn; /* mfn of page to place vcpu_info */
|
uint64_t mfn; /* mfn of page to place vcpu_info */
|
||||||
uint32_t offset; /* offset within page */
|
uint32_t offset; /* offset within page */
|
||||||
|
uint32_t rsvd; /* unused */
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif /* __XEN_PUBLIC_VCPU_H__ */
|
#endif /* __XEN_PUBLIC_VCPU_H__ */
|
||||||
|
|
|
@ -155,7 +155,6 @@ config SPLIT_PTLOCK_CPUS
|
||||||
int
|
int
|
||||||
default "4096" if ARM && !CPU_CACHE_VIPT
|
default "4096" if ARM && !CPU_CACHE_VIPT
|
||||||
default "4096" if PARISC && !PA20
|
default "4096" if PARISC && !PA20
|
||||||
default "4096" if XEN
|
|
||||||
default "4"
|
default "4"
|
||||||
|
|
||||||
#
|
#
|
||||||
|
|
Loading…
Reference in New Issue