Merge branch 'kvm-updates-2.6.26' of git://git.kernel.org/pub/scm/linux/kernel/git/avi/kvm
* 'kvm-updates-2.6.26' of git://git.kernel.org/pub/scm/linux/kernel/git/avi/kvm: x86: KVM geust: make setup_secondary_clock definition dependent on local apic KVM: MMU: Allow more than PAGES_PER_HPAGE write protections per large page KVM: avoid fx_init() schedule in atomic KVM: Avoid spurious execeptions after setting registers KVM: PIT: support mode 4 KVM: x86 emulator: disable writeback on lmsw KVM: ppc: deliver INTERRUPT_FP_UNAVAIL to the guest KVM: ppc: Handle guest idle by emulating MSR[WE] writes KVM: x86: task switch: fix wrong bit setting for the busy flag KVM: VMX: Enable EPT feature for KVM KVM: VMX: Prepare an identity page table for EPT in real mode KVM: Export necessary function for EPT KVM: MMU: Remove #ifdef CONFIG_X86_64 to support 4 level EPT KVM: MMU: Add EPT support KVM: Add kvm_x86_ops get_tdp_level() KVM: MMU: Move some definitions to a header file KVM: VMX: EPT Feature Detection
This commit is contained in:
commit
8dcf578284
|
@ -49,6 +49,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
|
|||
{ "inst_emu", VCPU_STAT(emulated_inst_exits) },
|
||||
{ "dec", VCPU_STAT(dec_exits) },
|
||||
{ "ext_intr", VCPU_STAT(ext_intr_exits) },
|
||||
{ "halt_wakeup", VCPU_STAT(halt_wakeup) },
|
||||
{ NULL }
|
||||
};
|
||||
|
||||
|
@ -338,6 +339,11 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
|
|||
}
|
||||
break;
|
||||
|
||||
case BOOKE_INTERRUPT_FP_UNAVAIL:
|
||||
kvmppc_queue_exception(vcpu, exit_nr);
|
||||
r = RESUME_GUEST;
|
||||
break;
|
||||
|
||||
case BOOKE_INTERRUPT_DATA_STORAGE:
|
||||
vcpu->arch.dear = vcpu->arch.fault_dear;
|
||||
vcpu->arch.esr = vcpu->arch.fault_esr;
|
||||
|
|
|
@ -36,13 +36,12 @@ gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn)
|
|||
|
||||
int kvm_cpu_has_interrupt(struct kvm_vcpu *v)
|
||||
{
|
||||
/* XXX implement me */
|
||||
return 0;
|
||||
return !!(v->arch.pending_exceptions);
|
||||
}
|
||||
|
||||
int kvm_arch_vcpu_runnable(struct kvm_vcpu *v)
|
||||
{
|
||||
return 1;
|
||||
return !(v->arch.msr & MSR_WE);
|
||||
}
|
||||
|
||||
|
||||
|
@ -214,6 +213,11 @@ static void kvmppc_decrementer_func(unsigned long data)
|
|||
struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data;
|
||||
|
||||
kvmppc_queue_exception(vcpu, BOOKE_INTERRUPT_DECREMENTER);
|
||||
|
||||
if (waitqueue_active(&vcpu->wq)) {
|
||||
wake_up_interruptible(&vcpu->wq);
|
||||
vcpu->stat.halt_wakeup++;
|
||||
}
|
||||
}
|
||||
|
||||
int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
|
||||
|
@ -339,6 +343,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
|
|||
int r;
|
||||
sigset_t sigsaved;
|
||||
|
||||
vcpu_load(vcpu);
|
||||
|
||||
if (vcpu->sigset_active)
|
||||
sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
|
||||
|
||||
|
@ -363,12 +369,20 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
|
|||
if (vcpu->sigset_active)
|
||||
sigprocmask(SIG_SETMASK, &sigsaved, NULL);
|
||||
|
||||
vcpu_put(vcpu);
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq)
|
||||
{
|
||||
kvmppc_queue_exception(vcpu, BOOKE_INTERRUPT_EXTERNAL);
|
||||
|
||||
if (waitqueue_active(&vcpu->wq)) {
|
||||
wake_up_interruptible(&vcpu->wq);
|
||||
vcpu->stat.halt_wakeup++;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
|
@ -133,6 +133,7 @@ static int kvm_register_clock(void)
|
|||
return native_write_msr_safe(MSR_KVM_SYSTEM_TIME, low, high);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_X86_LOCAL_APIC
|
||||
static void kvm_setup_secondary_clock(void)
|
||||
{
|
||||
/*
|
||||
|
@ -143,6 +144,7 @@ static void kvm_setup_secondary_clock(void)
|
|||
/* ok, done with our trickery, call native */
|
||||
setup_secondary_APIC_clock();
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* After the clock is registered, the host will keep writing to the
|
||||
|
@ -177,7 +179,9 @@ void __init kvmclock_init(void)
|
|||
pv_time_ops.get_wallclock = kvm_get_wallclock;
|
||||
pv_time_ops.set_wallclock = kvm_set_wallclock;
|
||||
pv_time_ops.sched_clock = kvm_clock_read;
|
||||
#ifdef CONFIG_X86_LOCAL_APIC
|
||||
pv_apic_ops.setup_secondary_clock = kvm_setup_secondary_clock;
|
||||
#endif
|
||||
machine_ops.shutdown = kvm_shutdown;
|
||||
#ifdef CONFIG_KEXEC
|
||||
machine_ops.crash_shutdown = kvm_crash_shutdown;
|
||||
|
|
|
@ -288,6 +288,8 @@ static void pit_load_count(struct kvm *kvm, int channel, u32 val)
|
|||
* mode 1 is one shot, mode 2 is period, otherwise del timer */
|
||||
switch (ps->channels[0].mode) {
|
||||
case 1:
|
||||
/* FIXME: enhance mode 4 precision */
|
||||
case 4:
|
||||
create_pit_timer(&ps->pit_timer, val, 0);
|
||||
break;
|
||||
case 2:
|
||||
|
|
|
@ -79,36 +79,6 @@ static int dbg = 1;
|
|||
}
|
||||
#endif
|
||||
|
||||
#define PT64_PT_BITS 9
|
||||
#define PT64_ENT_PER_PAGE (1 << PT64_PT_BITS)
|
||||
#define PT32_PT_BITS 10
|
||||
#define PT32_ENT_PER_PAGE (1 << PT32_PT_BITS)
|
||||
|
||||
#define PT_WRITABLE_SHIFT 1
|
||||
|
||||
#define PT_PRESENT_MASK (1ULL << 0)
|
||||
#define PT_WRITABLE_MASK (1ULL << PT_WRITABLE_SHIFT)
|
||||
#define PT_USER_MASK (1ULL << 2)
|
||||
#define PT_PWT_MASK (1ULL << 3)
|
||||
#define PT_PCD_MASK (1ULL << 4)
|
||||
#define PT_ACCESSED_MASK (1ULL << 5)
|
||||
#define PT_DIRTY_MASK (1ULL << 6)
|
||||
#define PT_PAGE_SIZE_MASK (1ULL << 7)
|
||||
#define PT_PAT_MASK (1ULL << 7)
|
||||
#define PT_GLOBAL_MASK (1ULL << 8)
|
||||
#define PT64_NX_SHIFT 63
|
||||
#define PT64_NX_MASK (1ULL << PT64_NX_SHIFT)
|
||||
|
||||
#define PT_PAT_SHIFT 7
|
||||
#define PT_DIR_PAT_SHIFT 12
|
||||
#define PT_DIR_PAT_MASK (1ULL << PT_DIR_PAT_SHIFT)
|
||||
|
||||
#define PT32_DIR_PSE36_SIZE 4
|
||||
#define PT32_DIR_PSE36_SHIFT 13
|
||||
#define PT32_DIR_PSE36_MASK \
|
||||
(((1ULL << PT32_DIR_PSE36_SIZE) - 1) << PT32_DIR_PSE36_SHIFT)
|
||||
|
||||
|
||||
#define PT_FIRST_AVAIL_BITS_SHIFT 9
|
||||
#define PT64_SECOND_AVAIL_BITS_SHIFT 52
|
||||
|
||||
|
@ -154,10 +124,6 @@ static int dbg = 1;
|
|||
#define PFERR_USER_MASK (1U << 2)
|
||||
#define PFERR_FETCH_MASK (1U << 4)
|
||||
|
||||
#define PT64_ROOT_LEVEL 4
|
||||
#define PT32_ROOT_LEVEL 2
|
||||
#define PT32E_ROOT_LEVEL 3
|
||||
|
||||
#define PT_DIRECTORY_LEVEL 2
|
||||
#define PT_PAGE_TABLE_LEVEL 1
|
||||
|
||||
|
@ -186,6 +152,12 @@ static struct kmem_cache *mmu_page_header_cache;
|
|||
|
||||
static u64 __read_mostly shadow_trap_nonpresent_pte;
|
||||
static u64 __read_mostly shadow_notrap_nonpresent_pte;
|
||||
static u64 __read_mostly shadow_base_present_pte;
|
||||
static u64 __read_mostly shadow_nx_mask;
|
||||
static u64 __read_mostly shadow_x_mask; /* mutual exclusive with nx_mask */
|
||||
static u64 __read_mostly shadow_user_mask;
|
||||
static u64 __read_mostly shadow_accessed_mask;
|
||||
static u64 __read_mostly shadow_dirty_mask;
|
||||
|
||||
void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte)
|
||||
{
|
||||
|
@ -194,6 +166,23 @@ void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte)
|
|||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_mmu_set_nonpresent_ptes);
|
||||
|
||||
void kvm_mmu_set_base_ptes(u64 base_pte)
|
||||
{
|
||||
shadow_base_present_pte = base_pte;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_mmu_set_base_ptes);
|
||||
|
||||
void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask,
|
||||
u64 dirty_mask, u64 nx_mask, u64 x_mask)
|
||||
{
|
||||
shadow_user_mask = user_mask;
|
||||
shadow_accessed_mask = accessed_mask;
|
||||
shadow_dirty_mask = dirty_mask;
|
||||
shadow_nx_mask = nx_mask;
|
||||
shadow_x_mask = x_mask;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_mmu_set_mask_ptes);
|
||||
|
||||
static int is_write_protection(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return vcpu->arch.cr0 & X86_CR0_WP;
|
||||
|
@ -232,7 +221,7 @@ static int is_writeble_pte(unsigned long pte)
|
|||
|
||||
static int is_dirty_pte(unsigned long pte)
|
||||
{
|
||||
return pte & PT_DIRTY_MASK;
|
||||
return pte & shadow_dirty_mask;
|
||||
}
|
||||
|
||||
static int is_rmap_pte(u64 pte)
|
||||
|
@ -387,7 +376,6 @@ static void account_shadowed(struct kvm *kvm, gfn_t gfn)
|
|||
|
||||
write_count = slot_largepage_idx(gfn, gfn_to_memslot(kvm, gfn));
|
||||
*write_count += 1;
|
||||
WARN_ON(*write_count > KVM_PAGES_PER_HPAGE);
|
||||
}
|
||||
|
||||
static void unaccount_shadowed(struct kvm *kvm, gfn_t gfn)
|
||||
|
@ -547,7 +535,7 @@ static void rmap_remove(struct kvm *kvm, u64 *spte)
|
|||
return;
|
||||
sp = page_header(__pa(spte));
|
||||
pfn = spte_to_pfn(*spte);
|
||||
if (*spte & PT_ACCESSED_MASK)
|
||||
if (*spte & shadow_accessed_mask)
|
||||
kvm_set_pfn_accessed(pfn);
|
||||
if (is_writeble_pte(*spte))
|
||||
kvm_release_pfn_dirty(pfn);
|
||||
|
@ -1073,17 +1061,17 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
|
|||
* whether the guest actually used the pte (in order to detect
|
||||
* demand paging).
|
||||
*/
|
||||
spte = PT_PRESENT_MASK | PT_DIRTY_MASK;
|
||||
spte = shadow_base_present_pte | shadow_dirty_mask;
|
||||
if (!speculative)
|
||||
pte_access |= PT_ACCESSED_MASK;
|
||||
if (!dirty)
|
||||
pte_access &= ~ACC_WRITE_MASK;
|
||||
if (!(pte_access & ACC_EXEC_MASK))
|
||||
spte |= PT64_NX_MASK;
|
||||
|
||||
spte |= PT_PRESENT_MASK;
|
||||
if (pte_access & ACC_EXEC_MASK)
|
||||
spte |= shadow_x_mask;
|
||||
else
|
||||
spte |= shadow_nx_mask;
|
||||
if (pte_access & ACC_USER_MASK)
|
||||
spte |= PT_USER_MASK;
|
||||
spte |= shadow_user_mask;
|
||||
if (largepage)
|
||||
spte |= PT_PAGE_SIZE_MASK;
|
||||
|
||||
|
@ -1188,8 +1176,9 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write,
|
|||
return -ENOMEM;
|
||||
}
|
||||
|
||||
table[index] = __pa(new_table->spt) | PT_PRESENT_MASK
|
||||
| PT_WRITABLE_MASK | PT_USER_MASK;
|
||||
table[index] = __pa(new_table->spt)
|
||||
| PT_PRESENT_MASK | PT_WRITABLE_MASK
|
||||
| shadow_user_mask | shadow_x_mask;
|
||||
}
|
||||
table_addr = table[index] & PT64_BASE_ADDR_MASK;
|
||||
}
|
||||
|
@ -1244,7 +1233,6 @@ static void mmu_free_roots(struct kvm_vcpu *vcpu)
|
|||
if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
|
||||
return;
|
||||
spin_lock(&vcpu->kvm->mmu_lock);
|
||||
#ifdef CONFIG_X86_64
|
||||
if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_LEVEL) {
|
||||
hpa_t root = vcpu->arch.mmu.root_hpa;
|
||||
|
||||
|
@ -1256,7 +1244,6 @@ static void mmu_free_roots(struct kvm_vcpu *vcpu)
|
|||
spin_unlock(&vcpu->kvm->mmu_lock);
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
for (i = 0; i < 4; ++i) {
|
||||
hpa_t root = vcpu->arch.mmu.pae_root[i];
|
||||
|
||||
|
@ -1282,7 +1269,6 @@ static void mmu_alloc_roots(struct kvm_vcpu *vcpu)
|
|||
|
||||
root_gfn = vcpu->arch.cr3 >> PAGE_SHIFT;
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_LEVEL) {
|
||||
hpa_t root = vcpu->arch.mmu.root_hpa;
|
||||
|
||||
|
@ -1297,7 +1283,6 @@ static void mmu_alloc_roots(struct kvm_vcpu *vcpu)
|
|||
vcpu->arch.mmu.root_hpa = root;
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
metaphysical = !is_paging(vcpu);
|
||||
if (tdp_enabled)
|
||||
metaphysical = 1;
|
||||
|
@ -1377,7 +1362,7 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa,
|
|||
spin_lock(&vcpu->kvm->mmu_lock);
|
||||
kvm_mmu_free_some_pages(vcpu);
|
||||
r = __direct_map(vcpu, gpa, error_code & PFERR_WRITE_MASK,
|
||||
largepage, gfn, pfn, TDP_ROOT_LEVEL);
|
||||
largepage, gfn, pfn, kvm_x86_ops->get_tdp_level());
|
||||
spin_unlock(&vcpu->kvm->mmu_lock);
|
||||
|
||||
return r;
|
||||
|
@ -1484,7 +1469,7 @@ static int init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
|
|||
context->page_fault = tdp_page_fault;
|
||||
context->free = nonpaging_free;
|
||||
context->prefetch_page = nonpaging_prefetch_page;
|
||||
context->shadow_root_level = TDP_ROOT_LEVEL;
|
||||
context->shadow_root_level = kvm_x86_ops->get_tdp_level();
|
||||
context->root_hpa = INVALID_PAGE;
|
||||
|
||||
if (!is_paging(vcpu)) {
|
||||
|
@ -1633,7 +1618,7 @@ static bool last_updated_pte_accessed(struct kvm_vcpu *vcpu)
|
|||
{
|
||||
u64 *spte = vcpu->arch.last_pte_updated;
|
||||
|
||||
return !!(spte && (*spte & PT_ACCESSED_MASK));
|
||||
return !!(spte && (*spte & shadow_accessed_mask));
|
||||
}
|
||||
|
||||
static void mmu_guess_page_from_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
|
||||
|
|
|
@ -3,11 +3,38 @@
|
|||
|
||||
#include <linux/kvm_host.h>
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
#define TDP_ROOT_LEVEL PT64_ROOT_LEVEL
|
||||
#else
|
||||
#define TDP_ROOT_LEVEL PT32E_ROOT_LEVEL
|
||||
#endif
|
||||
#define PT64_PT_BITS 9
|
||||
#define PT64_ENT_PER_PAGE (1 << PT64_PT_BITS)
|
||||
#define PT32_PT_BITS 10
|
||||
#define PT32_ENT_PER_PAGE (1 << PT32_PT_BITS)
|
||||
|
||||
#define PT_WRITABLE_SHIFT 1
|
||||
|
||||
#define PT_PRESENT_MASK (1ULL << 0)
|
||||
#define PT_WRITABLE_MASK (1ULL << PT_WRITABLE_SHIFT)
|
||||
#define PT_USER_MASK (1ULL << 2)
|
||||
#define PT_PWT_MASK (1ULL << 3)
|
||||
#define PT_PCD_MASK (1ULL << 4)
|
||||
#define PT_ACCESSED_MASK (1ULL << 5)
|
||||
#define PT_DIRTY_MASK (1ULL << 6)
|
||||
#define PT_PAGE_SIZE_MASK (1ULL << 7)
|
||||
#define PT_PAT_MASK (1ULL << 7)
|
||||
#define PT_GLOBAL_MASK (1ULL << 8)
|
||||
#define PT64_NX_SHIFT 63
|
||||
#define PT64_NX_MASK (1ULL << PT64_NX_SHIFT)
|
||||
|
||||
#define PT_PAT_SHIFT 7
|
||||
#define PT_DIR_PAT_SHIFT 12
|
||||
#define PT_DIR_PAT_MASK (1ULL << PT_DIR_PAT_SHIFT)
|
||||
|
||||
#define PT32_DIR_PSE36_SIZE 4
|
||||
#define PT32_DIR_PSE36_SHIFT 13
|
||||
#define PT32_DIR_PSE36_MASK \
|
||||
(((1ULL << PT32_DIR_PSE36_SIZE) - 1) << PT32_DIR_PSE36_SHIFT)
|
||||
|
||||
#define PT64_ROOT_LEVEL 4
|
||||
#define PT32_ROOT_LEVEL 2
|
||||
#define PT32E_ROOT_LEVEL 3
|
||||
|
||||
static inline void kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
|
|
|
@ -1863,6 +1863,15 @@ static bool svm_cpu_has_accelerated_tpr(void)
|
|||
return false;
|
||||
}
|
||||
|
||||
static int get_npt_level(void)
|
||||
{
|
||||
#ifdef CONFIG_X86_64
|
||||
return PT64_ROOT_LEVEL;
|
||||
#else
|
||||
return PT32E_ROOT_LEVEL;
|
||||
#endif
|
||||
}
|
||||
|
||||
static struct kvm_x86_ops svm_x86_ops = {
|
||||
.cpu_has_kvm_support = has_svm,
|
||||
.disabled_by_bios = is_disabled,
|
||||
|
@ -1920,6 +1929,7 @@ static struct kvm_x86_ops svm_x86_ops = {
|
|||
.inject_pending_vectors = do_interrupt_requests,
|
||||
|
||||
.set_tss_addr = svm_set_tss_addr,
|
||||
.get_tdp_level = get_npt_level,
|
||||
};
|
||||
|
||||
static int __init svm_init(void)
|
||||
|
|
|
@ -42,6 +42,9 @@ module_param(enable_vpid, bool, 0);
|
|||
static int flexpriority_enabled = 1;
|
||||
module_param(flexpriority_enabled, bool, 0);
|
||||
|
||||
static int enable_ept = 1;
|
||||
module_param(enable_ept, bool, 0);
|
||||
|
||||
struct vmcs {
|
||||
u32 revision_id;
|
||||
u32 abort;
|
||||
|
@ -84,7 +87,7 @@ static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu)
|
|||
return container_of(vcpu, struct vcpu_vmx, vcpu);
|
||||
}
|
||||
|
||||
static int init_rmode_tss(struct kvm *kvm);
|
||||
static int init_rmode(struct kvm *kvm);
|
||||
|
||||
static DEFINE_PER_CPU(struct vmcs *, vmxarea);
|
||||
static DEFINE_PER_CPU(struct vmcs *, current_vmcs);
|
||||
|
@ -107,6 +110,11 @@ static struct vmcs_config {
|
|||
u32 vmentry_ctrl;
|
||||
} vmcs_config;
|
||||
|
||||
struct vmx_capability {
|
||||
u32 ept;
|
||||
u32 vpid;
|
||||
} vmx_capability;
|
||||
|
||||
#define VMX_SEGMENT_FIELD(seg) \
|
||||
[VCPU_SREG_##seg] = { \
|
||||
.selector = GUEST_##seg##_SELECTOR, \
|
||||
|
@ -214,6 +222,32 @@ static inline bool cpu_has_vmx_virtualize_apic_accesses(void)
|
|||
SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES);
|
||||
}
|
||||
|
||||
static inline int cpu_has_vmx_invept_individual_addr(void)
|
||||
{
|
||||
return (!!(vmx_capability.ept & VMX_EPT_EXTENT_INDIVIDUAL_BIT));
|
||||
}
|
||||
|
||||
static inline int cpu_has_vmx_invept_context(void)
|
||||
{
|
||||
return (!!(vmx_capability.ept & VMX_EPT_EXTENT_CONTEXT_BIT));
|
||||
}
|
||||
|
||||
static inline int cpu_has_vmx_invept_global(void)
|
||||
{
|
||||
return (!!(vmx_capability.ept & VMX_EPT_EXTENT_GLOBAL_BIT));
|
||||
}
|
||||
|
||||
static inline int cpu_has_vmx_ept(void)
|
||||
{
|
||||
return (vmcs_config.cpu_based_2nd_exec_ctrl &
|
||||
SECONDARY_EXEC_ENABLE_EPT);
|
||||
}
|
||||
|
||||
static inline int vm_need_ept(void)
|
||||
{
|
||||
return (cpu_has_vmx_ept() && enable_ept);
|
||||
}
|
||||
|
||||
static inline int vm_need_virtualize_apic_accesses(struct kvm *kvm)
|
||||
{
|
||||
return ((cpu_has_vmx_virtualize_apic_accesses()) &&
|
||||
|
@ -250,6 +284,18 @@ static inline void __invvpid(int ext, u16 vpid, gva_t gva)
|
|||
: : "a"(&operand), "c"(ext) : "cc", "memory");
|
||||
}
|
||||
|
||||
static inline void __invept(int ext, u64 eptp, gpa_t gpa)
|
||||
{
|
||||
struct {
|
||||
u64 eptp, gpa;
|
||||
} operand = {eptp, gpa};
|
||||
|
||||
asm volatile (ASM_VMX_INVEPT
|
||||
/* CF==1 or ZF==1 --> rc = -1 */
|
||||
"; ja 1f ; ud2 ; 1:\n"
|
||||
: : "a" (&operand), "c" (ext) : "cc", "memory");
|
||||
}
|
||||
|
||||
static struct kvm_msr_entry *find_msr_entry(struct vcpu_vmx *vmx, u32 msr)
|
||||
{
|
||||
int i;
|
||||
|
@ -301,6 +347,33 @@ static inline void vpid_sync_vcpu_all(struct vcpu_vmx *vmx)
|
|||
__invvpid(VMX_VPID_EXTENT_SINGLE_CONTEXT, vmx->vpid, 0);
|
||||
}
|
||||
|
||||
static inline void ept_sync_global(void)
|
||||
{
|
||||
if (cpu_has_vmx_invept_global())
|
||||
__invept(VMX_EPT_EXTENT_GLOBAL, 0, 0);
|
||||
}
|
||||
|
||||
static inline void ept_sync_context(u64 eptp)
|
||||
{
|
||||
if (vm_need_ept()) {
|
||||
if (cpu_has_vmx_invept_context())
|
||||
__invept(VMX_EPT_EXTENT_CONTEXT, eptp, 0);
|
||||
else
|
||||
ept_sync_global();
|
||||
}
|
||||
}
|
||||
|
||||
static inline void ept_sync_individual_addr(u64 eptp, gpa_t gpa)
|
||||
{
|
||||
if (vm_need_ept()) {
|
||||
if (cpu_has_vmx_invept_individual_addr())
|
||||
__invept(VMX_EPT_EXTENT_INDIVIDUAL_ADDR,
|
||||
eptp, gpa);
|
||||
else
|
||||
ept_sync_context(eptp);
|
||||
}
|
||||
}
|
||||
|
||||
static unsigned long vmcs_readl(unsigned long field)
|
||||
{
|
||||
unsigned long value;
|
||||
|
@ -388,6 +461,8 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu)
|
|||
eb |= 1u << 1;
|
||||
if (vcpu->arch.rmode.active)
|
||||
eb = ~0;
|
||||
if (vm_need_ept())
|
||||
eb &= ~(1u << PF_VECTOR); /* bypass_guest_pf = 0 */
|
||||
vmcs_write32(EXCEPTION_BITMAP, eb);
|
||||
}
|
||||
|
||||
|
@ -985,7 +1060,7 @@ static __init int adjust_vmx_controls(u32 ctl_min, u32 ctl_opt,
|
|||
static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
|
||||
{
|
||||
u32 vmx_msr_low, vmx_msr_high;
|
||||
u32 min, opt;
|
||||
u32 min, opt, min2, opt2;
|
||||
u32 _pin_based_exec_control = 0;
|
||||
u32 _cpu_based_exec_control = 0;
|
||||
u32 _cpu_based_2nd_exec_control = 0;
|
||||
|
@ -1003,6 +1078,8 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
|
|||
CPU_BASED_CR8_LOAD_EXITING |
|
||||
CPU_BASED_CR8_STORE_EXITING |
|
||||
#endif
|
||||
CPU_BASED_CR3_LOAD_EXITING |
|
||||
CPU_BASED_CR3_STORE_EXITING |
|
||||
CPU_BASED_USE_IO_BITMAPS |
|
||||
CPU_BASED_MOV_DR_EXITING |
|
||||
CPU_BASED_USE_TSC_OFFSETING;
|
||||
|
@ -1018,11 +1095,13 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
|
|||
~CPU_BASED_CR8_STORE_EXITING;
|
||||
#endif
|
||||
if (_cpu_based_exec_control & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS) {
|
||||
min = 0;
|
||||
opt = SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
|
||||
min2 = 0;
|
||||
opt2 = SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
|
||||
SECONDARY_EXEC_WBINVD_EXITING |
|
||||
SECONDARY_EXEC_ENABLE_VPID;
|
||||
if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PROCBASED_CTLS2,
|
||||
SECONDARY_EXEC_ENABLE_VPID |
|
||||
SECONDARY_EXEC_ENABLE_EPT;
|
||||
if (adjust_vmx_controls(min2, opt2,
|
||||
MSR_IA32_VMX_PROCBASED_CTLS2,
|
||||
&_cpu_based_2nd_exec_control) < 0)
|
||||
return -EIO;
|
||||
}
|
||||
|
@ -1031,6 +1110,16 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
|
|||
SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES))
|
||||
_cpu_based_exec_control &= ~CPU_BASED_TPR_SHADOW;
|
||||
#endif
|
||||
if (_cpu_based_2nd_exec_control & SECONDARY_EXEC_ENABLE_EPT) {
|
||||
/* CR3 accesses don't need to cause VM Exits when EPT enabled */
|
||||
min &= ~(CPU_BASED_CR3_LOAD_EXITING |
|
||||
CPU_BASED_CR3_STORE_EXITING);
|
||||
if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PROCBASED_CTLS,
|
||||
&_cpu_based_exec_control) < 0)
|
||||
return -EIO;
|
||||
rdmsr(MSR_IA32_VMX_EPT_VPID_CAP,
|
||||
vmx_capability.ept, vmx_capability.vpid);
|
||||
}
|
||||
|
||||
min = 0;
|
||||
#ifdef CONFIG_X86_64
|
||||
|
@ -1256,7 +1345,7 @@ static void enter_rmode(struct kvm_vcpu *vcpu)
|
|||
fix_rmode_seg(VCPU_SREG_FS, &vcpu->arch.rmode.fs);
|
||||
|
||||
kvm_mmu_reset_context(vcpu);
|
||||
init_rmode_tss(vcpu->kvm);
|
||||
init_rmode(vcpu->kvm);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
|
@ -1304,8 +1393,64 @@ static void vmx_decache_cr4_guest_bits(struct kvm_vcpu *vcpu)
|
|||
vcpu->arch.cr4 |= vmcs_readl(GUEST_CR4) & ~KVM_GUEST_CR4_MASK;
|
||||
}
|
||||
|
||||
static void ept_load_pdptrs(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
if (is_paging(vcpu) && is_pae(vcpu) && !is_long_mode(vcpu)) {
|
||||
if (!load_pdptrs(vcpu, vcpu->arch.cr3)) {
|
||||
printk(KERN_ERR "EPT: Fail to load pdptrs!\n");
|
||||
return;
|
||||
}
|
||||
vmcs_write64(GUEST_PDPTR0, vcpu->arch.pdptrs[0]);
|
||||
vmcs_write64(GUEST_PDPTR1, vcpu->arch.pdptrs[1]);
|
||||
vmcs_write64(GUEST_PDPTR2, vcpu->arch.pdptrs[2]);
|
||||
vmcs_write64(GUEST_PDPTR3, vcpu->arch.pdptrs[3]);
|
||||
}
|
||||
}
|
||||
|
||||
static void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4);
|
||||
|
||||
static void ept_update_paging_mode_cr0(unsigned long *hw_cr0,
|
||||
unsigned long cr0,
|
||||
struct kvm_vcpu *vcpu)
|
||||
{
|
||||
if (!(cr0 & X86_CR0_PG)) {
|
||||
/* From paging/starting to nonpaging */
|
||||
vmcs_write32(CPU_BASED_VM_EXEC_CONTROL,
|
||||
vmcs_config.cpu_based_exec_ctrl |
|
||||
(CPU_BASED_CR3_LOAD_EXITING |
|
||||
CPU_BASED_CR3_STORE_EXITING));
|
||||
vcpu->arch.cr0 = cr0;
|
||||
vmx_set_cr4(vcpu, vcpu->arch.cr4);
|
||||
*hw_cr0 |= X86_CR0_PE | X86_CR0_PG;
|
||||
*hw_cr0 &= ~X86_CR0_WP;
|
||||
} else if (!is_paging(vcpu)) {
|
||||
/* From nonpaging to paging */
|
||||
vmcs_write32(CPU_BASED_VM_EXEC_CONTROL,
|
||||
vmcs_config.cpu_based_exec_ctrl &
|
||||
~(CPU_BASED_CR3_LOAD_EXITING |
|
||||
CPU_BASED_CR3_STORE_EXITING));
|
||||
vcpu->arch.cr0 = cr0;
|
||||
vmx_set_cr4(vcpu, vcpu->arch.cr4);
|
||||
if (!(vcpu->arch.cr0 & X86_CR0_WP))
|
||||
*hw_cr0 &= ~X86_CR0_WP;
|
||||
}
|
||||
}
|
||||
|
||||
static void ept_update_paging_mode_cr4(unsigned long *hw_cr4,
|
||||
struct kvm_vcpu *vcpu)
|
||||
{
|
||||
if (!is_paging(vcpu)) {
|
||||
*hw_cr4 &= ~X86_CR4_PAE;
|
||||
*hw_cr4 |= X86_CR4_PSE;
|
||||
} else if (!(vcpu->arch.cr4 & X86_CR4_PAE))
|
||||
*hw_cr4 &= ~X86_CR4_PAE;
|
||||
}
|
||||
|
||||
static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
|
||||
{
|
||||
unsigned long hw_cr0 = (cr0 & ~KVM_GUEST_CR0_MASK) |
|
||||
KVM_VM_CR0_ALWAYS_ON;
|
||||
|
||||
vmx_fpu_deactivate(vcpu);
|
||||
|
||||
if (vcpu->arch.rmode.active && (cr0 & X86_CR0_PE))
|
||||
|
@ -1323,29 +1468,61 @@ static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
|
|||
}
|
||||
#endif
|
||||
|
||||
if (vm_need_ept())
|
||||
ept_update_paging_mode_cr0(&hw_cr0, cr0, vcpu);
|
||||
|
||||
vmcs_writel(CR0_READ_SHADOW, cr0);
|
||||
vmcs_writel(GUEST_CR0,
|
||||
(cr0 & ~KVM_GUEST_CR0_MASK) | KVM_VM_CR0_ALWAYS_ON);
|
||||
vmcs_writel(GUEST_CR0, hw_cr0);
|
||||
vcpu->arch.cr0 = cr0;
|
||||
|
||||
if (!(cr0 & X86_CR0_TS) || !(cr0 & X86_CR0_PE))
|
||||
vmx_fpu_activate(vcpu);
|
||||
}
|
||||
|
||||
static u64 construct_eptp(unsigned long root_hpa)
|
||||
{
|
||||
u64 eptp;
|
||||
|
||||
/* TODO write the value reading from MSR */
|
||||
eptp = VMX_EPT_DEFAULT_MT |
|
||||
VMX_EPT_DEFAULT_GAW << VMX_EPT_GAW_EPTP_SHIFT;
|
||||
eptp |= (root_hpa & PAGE_MASK);
|
||||
|
||||
return eptp;
|
||||
}
|
||||
|
||||
static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
|
||||
{
|
||||
unsigned long guest_cr3;
|
||||
u64 eptp;
|
||||
|
||||
guest_cr3 = cr3;
|
||||
if (vm_need_ept()) {
|
||||
eptp = construct_eptp(cr3);
|
||||
vmcs_write64(EPT_POINTER, eptp);
|
||||
ept_sync_context(eptp);
|
||||
ept_load_pdptrs(vcpu);
|
||||
guest_cr3 = is_paging(vcpu) ? vcpu->arch.cr3 :
|
||||
VMX_EPT_IDENTITY_PAGETABLE_ADDR;
|
||||
}
|
||||
|
||||
vmx_flush_tlb(vcpu);
|
||||
vmcs_writel(GUEST_CR3, cr3);
|
||||
vmcs_writel(GUEST_CR3, guest_cr3);
|
||||
if (vcpu->arch.cr0 & X86_CR0_PE)
|
||||
vmx_fpu_deactivate(vcpu);
|
||||
}
|
||||
|
||||
static void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
|
||||
{
|
||||
vmcs_writel(CR4_READ_SHADOW, cr4);
|
||||
vmcs_writel(GUEST_CR4, cr4 | (vcpu->arch.rmode.active ?
|
||||
KVM_RMODE_VM_CR4_ALWAYS_ON : KVM_PMODE_VM_CR4_ALWAYS_ON));
|
||||
unsigned long hw_cr4 = cr4 | (vcpu->arch.rmode.active ?
|
||||
KVM_RMODE_VM_CR4_ALWAYS_ON : KVM_PMODE_VM_CR4_ALWAYS_ON);
|
||||
|
||||
vcpu->arch.cr4 = cr4;
|
||||
if (vm_need_ept())
|
||||
ept_update_paging_mode_cr4(&hw_cr4, vcpu);
|
||||
|
||||
vmcs_writel(CR4_READ_SHADOW, cr4);
|
||||
vmcs_writel(GUEST_CR4, hw_cr4);
|
||||
}
|
||||
|
||||
static void vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer)
|
||||
|
@ -1530,6 +1707,41 @@ out:
|
|||
return ret;
|
||||
}
|
||||
|
||||
static int init_rmode_identity_map(struct kvm *kvm)
|
||||
{
|
||||
int i, r, ret;
|
||||
pfn_t identity_map_pfn;
|
||||
u32 tmp;
|
||||
|
||||
if (!vm_need_ept())
|
||||
return 1;
|
||||
if (unlikely(!kvm->arch.ept_identity_pagetable)) {
|
||||
printk(KERN_ERR "EPT: identity-mapping pagetable "
|
||||
"haven't been allocated!\n");
|
||||
return 0;
|
||||
}
|
||||
if (likely(kvm->arch.ept_identity_pagetable_done))
|
||||
return 1;
|
||||
ret = 0;
|
||||
identity_map_pfn = VMX_EPT_IDENTITY_PAGETABLE_ADDR >> PAGE_SHIFT;
|
||||
r = kvm_clear_guest_page(kvm, identity_map_pfn, 0, PAGE_SIZE);
|
||||
if (r < 0)
|
||||
goto out;
|
||||
/* Set up identity-mapping pagetable for EPT in real mode */
|
||||
for (i = 0; i < PT32_ENT_PER_PAGE; i++) {
|
||||
tmp = (i << 22) + (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER |
|
||||
_PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_PSE);
|
||||
r = kvm_write_guest_page(kvm, identity_map_pfn,
|
||||
&tmp, i * sizeof(tmp), sizeof(tmp));
|
||||
if (r < 0)
|
||||
goto out;
|
||||
}
|
||||
kvm->arch.ept_identity_pagetable_done = true;
|
||||
ret = 1;
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void seg_setup(int seg)
|
||||
{
|
||||
struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
|
||||
|
@ -1564,6 +1776,31 @@ out:
|
|||
return r;
|
||||
}
|
||||
|
||||
static int alloc_identity_pagetable(struct kvm *kvm)
|
||||
{
|
||||
struct kvm_userspace_memory_region kvm_userspace_mem;
|
||||
int r = 0;
|
||||
|
||||
down_write(&kvm->slots_lock);
|
||||
if (kvm->arch.ept_identity_pagetable)
|
||||
goto out;
|
||||
kvm_userspace_mem.slot = IDENTITY_PAGETABLE_PRIVATE_MEMSLOT;
|
||||
kvm_userspace_mem.flags = 0;
|
||||
kvm_userspace_mem.guest_phys_addr = VMX_EPT_IDENTITY_PAGETABLE_ADDR;
|
||||
kvm_userspace_mem.memory_size = PAGE_SIZE;
|
||||
r = __kvm_set_memory_region(kvm, &kvm_userspace_mem, 0);
|
||||
if (r)
|
||||
goto out;
|
||||
|
||||
down_read(¤t->mm->mmap_sem);
|
||||
kvm->arch.ept_identity_pagetable = gfn_to_page(kvm,
|
||||
VMX_EPT_IDENTITY_PAGETABLE_ADDR >> PAGE_SHIFT);
|
||||
up_read(¤t->mm->mmap_sem);
|
||||
out:
|
||||
up_write(&kvm->slots_lock);
|
||||
return r;
|
||||
}
|
||||
|
||||
static void allocate_vpid(struct vcpu_vmx *vmx)
|
||||
{
|
||||
int vpid;
|
||||
|
@ -1638,6 +1875,9 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
|
|||
CPU_BASED_CR8_LOAD_EXITING;
|
||||
#endif
|
||||
}
|
||||
if (!vm_need_ept())
|
||||
exec_control |= CPU_BASED_CR3_STORE_EXITING |
|
||||
CPU_BASED_CR3_LOAD_EXITING;
|
||||
vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, exec_control);
|
||||
|
||||
if (cpu_has_secondary_exec_ctrls()) {
|
||||
|
@ -1647,6 +1887,8 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
|
|||
~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
|
||||
if (vmx->vpid == 0)
|
||||
exec_control &= ~SECONDARY_EXEC_ENABLE_VPID;
|
||||
if (!vm_need_ept())
|
||||
exec_control &= ~SECONDARY_EXEC_ENABLE_EPT;
|
||||
vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control);
|
||||
}
|
||||
|
||||
|
@ -1722,6 +1964,15 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
|
|||
return 0;
|
||||
}
|
||||
|
||||
static int init_rmode(struct kvm *kvm)
|
||||
{
|
||||
if (!init_rmode_tss(kvm))
|
||||
return 0;
|
||||
if (!init_rmode_identity_map(kvm))
|
||||
return 0;
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct vcpu_vmx *vmx = to_vmx(vcpu);
|
||||
|
@ -1729,7 +1980,7 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
|
|||
int ret;
|
||||
|
||||
down_read(&vcpu->kvm->slots_lock);
|
||||
if (!init_rmode_tss(vmx->vcpu.kvm)) {
|
||||
if (!init_rmode(vmx->vcpu.kvm)) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
@ -1994,6 +2245,9 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
|
|||
if (intr_info & INTR_INFO_DELIVER_CODE_MASK)
|
||||
error_code = vmcs_read32(VM_EXIT_INTR_ERROR_CODE);
|
||||
if (is_page_fault(intr_info)) {
|
||||
/* EPT won't cause page fault directly */
|
||||
if (vm_need_ept())
|
||||
BUG();
|
||||
cr2 = vmcs_readl(EXIT_QUALIFICATION);
|
||||
KVMTRACE_3D(PAGE_FAULT, vcpu, error_code, (u32)cr2,
|
||||
(u32)((u64)cr2 >> 32), handler);
|
||||
|
@ -2323,6 +2577,64 @@ static int handle_task_switch(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
|
|||
return kvm_task_switch(vcpu, tss_selector, reason);
|
||||
}
|
||||
|
||||
static int handle_ept_violation(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
|
||||
{
|
||||
u64 exit_qualification;
|
||||
enum emulation_result er;
|
||||
gpa_t gpa;
|
||||
unsigned long hva;
|
||||
int gla_validity;
|
||||
int r;
|
||||
|
||||
exit_qualification = vmcs_read64(EXIT_QUALIFICATION);
|
||||
|
||||
if (exit_qualification & (1 << 6)) {
|
||||
printk(KERN_ERR "EPT: GPA exceeds GAW!\n");
|
||||
return -ENOTSUPP;
|
||||
}
|
||||
|
||||
gla_validity = (exit_qualification >> 7) & 0x3;
|
||||
if (gla_validity != 0x3 && gla_validity != 0x1 && gla_validity != 0) {
|
||||
printk(KERN_ERR "EPT: Handling EPT violation failed!\n");
|
||||
printk(KERN_ERR "EPT: GPA: 0x%lx, GVA: 0x%lx\n",
|
||||
(long unsigned int)vmcs_read64(GUEST_PHYSICAL_ADDRESS),
|
||||
(long unsigned int)vmcs_read64(GUEST_LINEAR_ADDRESS));
|
||||
printk(KERN_ERR "EPT: Exit qualification is 0x%lx\n",
|
||||
(long unsigned int)exit_qualification);
|
||||
kvm_run->exit_reason = KVM_EXIT_UNKNOWN;
|
||||
kvm_run->hw.hardware_exit_reason = 0;
|
||||
return -ENOTSUPP;
|
||||
}
|
||||
|
||||
gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS);
|
||||
hva = gfn_to_hva(vcpu->kvm, gpa >> PAGE_SHIFT);
|
||||
if (!kvm_is_error_hva(hva)) {
|
||||
r = kvm_mmu_page_fault(vcpu, gpa & PAGE_MASK, 0);
|
||||
if (r < 0) {
|
||||
printk(KERN_ERR "EPT: Not enough memory!\n");
|
||||
return -ENOMEM;
|
||||
}
|
||||
return 1;
|
||||
} else {
|
||||
/* must be MMIO */
|
||||
er = emulate_instruction(vcpu, kvm_run, 0, 0, 0);
|
||||
|
||||
if (er == EMULATE_FAIL) {
|
||||
printk(KERN_ERR
|
||||
"EPT: Fail to handle EPT violation vmexit!er is %d\n",
|
||||
er);
|
||||
printk(KERN_ERR "EPT: GPA: 0x%lx, GVA: 0x%lx\n",
|
||||
(long unsigned int)vmcs_read64(GUEST_PHYSICAL_ADDRESS),
|
||||
(long unsigned int)vmcs_read64(GUEST_LINEAR_ADDRESS));
|
||||
printk(KERN_ERR "EPT: Exit qualification is 0x%lx\n",
|
||||
(long unsigned int)exit_qualification);
|
||||
return -ENOTSUPP;
|
||||
} else if (er == EMULATE_DO_MMIO)
|
||||
return 0;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* The exit handlers return 1 if the exit was handled fully and guest execution
|
||||
* may resume. Otherwise they set the kvm_run parameter to indicate what needs
|
||||
|
@ -2346,6 +2658,7 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu,
|
|||
[EXIT_REASON_APIC_ACCESS] = handle_apic_access,
|
||||
[EXIT_REASON_WBINVD] = handle_wbinvd,
|
||||
[EXIT_REASON_TASK_SWITCH] = handle_task_switch,
|
||||
[EXIT_REASON_EPT_VIOLATION] = handle_ept_violation,
|
||||
};
|
||||
|
||||
static const int kvm_vmx_max_exit_handlers =
|
||||
|
@ -2364,6 +2677,13 @@ static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
|
|||
KVMTRACE_3D(VMEXIT, vcpu, exit_reason, (u32)vmcs_readl(GUEST_RIP),
|
||||
(u32)((u64)vmcs_readl(GUEST_RIP) >> 32), entryexit);
|
||||
|
||||
/* Access CR3 don't cause VMExit in paging mode, so we need
|
||||
* to sync with guest real CR3. */
|
||||
if (vm_need_ept() && is_paging(vcpu)) {
|
||||
vcpu->arch.cr3 = vmcs_readl(GUEST_CR3);
|
||||
ept_load_pdptrs(vcpu);
|
||||
}
|
||||
|
||||
if (unlikely(vmx->fail)) {
|
||||
kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY;
|
||||
kvm_run->fail_entry.hardware_entry_failure_reason
|
||||
|
@ -2372,7 +2692,8 @@ static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
|
|||
}
|
||||
|
||||
if ((vectoring_info & VECTORING_INFO_VALID_MASK) &&
|
||||
exit_reason != EXIT_REASON_EXCEPTION_NMI)
|
||||
(exit_reason != EXIT_REASON_EXCEPTION_NMI &&
|
||||
exit_reason != EXIT_REASON_EPT_VIOLATION))
|
||||
printk(KERN_WARNING "%s: unexpected, valid vectoring info and "
|
||||
"exit reason is 0x%x\n", __func__, exit_reason);
|
||||
if (exit_reason < kvm_vmx_max_exit_handlers
|
||||
|
@ -2674,6 +2995,15 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
|
|||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
allocate_vpid(vmx);
|
||||
if (id == 0 && vm_need_ept()) {
|
||||
kvm_mmu_set_base_ptes(VMX_EPT_READABLE_MASK |
|
||||
VMX_EPT_WRITABLE_MASK |
|
||||
VMX_EPT_DEFAULT_MT << VMX_EPT_MT_EPTE_SHIFT);
|
||||
kvm_mmu_set_mask_ptes(0ull, VMX_EPT_FAKE_ACCESSED_MASK,
|
||||
VMX_EPT_FAKE_DIRTY_MASK, 0ull,
|
||||
VMX_EPT_EXECUTABLE_MASK);
|
||||
kvm_enable_tdp();
|
||||
}
|
||||
|
||||
err = kvm_vcpu_init(&vmx->vcpu, kvm, id);
|
||||
if (err)
|
||||
|
@ -2706,6 +3036,10 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
|
|||
if (alloc_apic_access_page(kvm) != 0)
|
||||
goto free_vmcs;
|
||||
|
||||
if (vm_need_ept())
|
||||
if (alloc_identity_pagetable(kvm) != 0)
|
||||
goto free_vmcs;
|
||||
|
||||
return &vmx->vcpu;
|
||||
|
||||
free_vmcs:
|
||||
|
@ -2735,6 +3069,11 @@ static void __init vmx_check_processor_compat(void *rtn)
|
|||
}
|
||||
}
|
||||
|
||||
static int get_ept_level(void)
|
||||
{
|
||||
return VMX_EPT_DEFAULT_GAW + 1;
|
||||
}
|
||||
|
||||
static struct kvm_x86_ops vmx_x86_ops = {
|
||||
.cpu_has_kvm_support = cpu_has_kvm_support,
|
||||
.disabled_by_bios = vmx_disabled_by_bios,
|
||||
|
@ -2791,6 +3130,7 @@ static struct kvm_x86_ops vmx_x86_ops = {
|
|||
.inject_pending_vectors = do_interrupt_requests,
|
||||
|
||||
.set_tss_addr = vmx_set_tss_addr,
|
||||
.get_tdp_level = get_ept_level,
|
||||
};
|
||||
|
||||
static int __init vmx_init(void)
|
||||
|
@ -2843,9 +3183,14 @@ static int __init vmx_init(void)
|
|||
vmx_disable_intercept_for_msr(vmx_msr_bitmap, MSR_IA32_SYSENTER_ESP);
|
||||
vmx_disable_intercept_for_msr(vmx_msr_bitmap, MSR_IA32_SYSENTER_EIP);
|
||||
|
||||
if (cpu_has_vmx_ept())
|
||||
bypass_guest_pf = 0;
|
||||
|
||||
if (bypass_guest_pf)
|
||||
kvm_mmu_set_nonpresent_ptes(~0xffeull, 0ull);
|
||||
|
||||
ept_sync_global();
|
||||
|
||||
return 0;
|
||||
|
||||
out2:
|
||||
|
|
|
@ -35,6 +35,8 @@
|
|||
#define CPU_BASED_MWAIT_EXITING 0x00000400
|
||||
#define CPU_BASED_RDPMC_EXITING 0x00000800
|
||||
#define CPU_BASED_RDTSC_EXITING 0x00001000
|
||||
#define CPU_BASED_CR3_LOAD_EXITING 0x00008000
|
||||
#define CPU_BASED_CR3_STORE_EXITING 0x00010000
|
||||
#define CPU_BASED_CR8_LOAD_EXITING 0x00080000
|
||||
#define CPU_BASED_CR8_STORE_EXITING 0x00100000
|
||||
#define CPU_BASED_TPR_SHADOW 0x00200000
|
||||
|
@ -49,6 +51,7 @@
|
|||
* Definitions of Secondary Processor-Based VM-Execution Controls.
|
||||
*/
|
||||
#define SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES 0x00000001
|
||||
#define SECONDARY_EXEC_ENABLE_EPT 0x00000002
|
||||
#define SECONDARY_EXEC_ENABLE_VPID 0x00000020
|
||||
#define SECONDARY_EXEC_WBINVD_EXITING 0x00000040
|
||||
|
||||
|
@ -100,10 +103,22 @@ enum vmcs_field {
|
|||
VIRTUAL_APIC_PAGE_ADDR_HIGH = 0x00002013,
|
||||
APIC_ACCESS_ADDR = 0x00002014,
|
||||
APIC_ACCESS_ADDR_HIGH = 0x00002015,
|
||||
EPT_POINTER = 0x0000201a,
|
||||
EPT_POINTER_HIGH = 0x0000201b,
|
||||
GUEST_PHYSICAL_ADDRESS = 0x00002400,
|
||||
GUEST_PHYSICAL_ADDRESS_HIGH = 0x00002401,
|
||||
VMCS_LINK_POINTER = 0x00002800,
|
||||
VMCS_LINK_POINTER_HIGH = 0x00002801,
|
||||
GUEST_IA32_DEBUGCTL = 0x00002802,
|
||||
GUEST_IA32_DEBUGCTL_HIGH = 0x00002803,
|
||||
GUEST_PDPTR0 = 0x0000280a,
|
||||
GUEST_PDPTR0_HIGH = 0x0000280b,
|
||||
GUEST_PDPTR1 = 0x0000280c,
|
||||
GUEST_PDPTR1_HIGH = 0x0000280d,
|
||||
GUEST_PDPTR2 = 0x0000280e,
|
||||
GUEST_PDPTR2_HIGH = 0x0000280f,
|
||||
GUEST_PDPTR3 = 0x00002810,
|
||||
GUEST_PDPTR3_HIGH = 0x00002811,
|
||||
PIN_BASED_VM_EXEC_CONTROL = 0x00004000,
|
||||
CPU_BASED_VM_EXEC_CONTROL = 0x00004002,
|
||||
EXCEPTION_BITMAP = 0x00004004,
|
||||
|
@ -226,6 +241,8 @@ enum vmcs_field {
|
|||
#define EXIT_REASON_MWAIT_INSTRUCTION 36
|
||||
#define EXIT_REASON_TPR_BELOW_THRESHOLD 43
|
||||
#define EXIT_REASON_APIC_ACCESS 44
|
||||
#define EXIT_REASON_EPT_VIOLATION 48
|
||||
#define EXIT_REASON_EPT_MISCONFIG 49
|
||||
#define EXIT_REASON_WBINVD 54
|
||||
|
||||
/*
|
||||
|
@ -316,15 +333,36 @@ enum vmcs_field {
|
|||
#define MSR_IA32_VMX_CR4_FIXED1 0x489
|
||||
#define MSR_IA32_VMX_VMCS_ENUM 0x48a
|
||||
#define MSR_IA32_VMX_PROCBASED_CTLS2 0x48b
|
||||
#define MSR_IA32_VMX_EPT_VPID_CAP 0x48c
|
||||
|
||||
#define MSR_IA32_FEATURE_CONTROL 0x3a
|
||||
#define MSR_IA32_FEATURE_CONTROL_LOCKED 0x1
|
||||
#define MSR_IA32_FEATURE_CONTROL_VMXON_ENABLED 0x4
|
||||
|
||||
#define APIC_ACCESS_PAGE_PRIVATE_MEMSLOT 9
|
||||
#define IDENTITY_PAGETABLE_PRIVATE_MEMSLOT 10
|
||||
|
||||
#define VMX_NR_VPIDS (1 << 16)
|
||||
#define VMX_VPID_EXTENT_SINGLE_CONTEXT 1
|
||||
#define VMX_VPID_EXTENT_ALL_CONTEXT 2
|
||||
|
||||
#define VMX_EPT_EXTENT_INDIVIDUAL_ADDR 0
|
||||
#define VMX_EPT_EXTENT_CONTEXT 1
|
||||
#define VMX_EPT_EXTENT_GLOBAL 2
|
||||
#define VMX_EPT_EXTENT_INDIVIDUAL_BIT (1ull << 24)
|
||||
#define VMX_EPT_EXTENT_CONTEXT_BIT (1ull << 25)
|
||||
#define VMX_EPT_EXTENT_GLOBAL_BIT (1ull << 26)
|
||||
#define VMX_EPT_DEFAULT_GAW 3
|
||||
#define VMX_EPT_MAX_GAW 0x4
|
||||
#define VMX_EPT_MT_EPTE_SHIFT 3
|
||||
#define VMX_EPT_GAW_EPTP_SHIFT 3
|
||||
#define VMX_EPT_DEFAULT_MT 0x6ull
|
||||
#define VMX_EPT_READABLE_MASK 0x1ull
|
||||
#define VMX_EPT_WRITABLE_MASK 0x2ull
|
||||
#define VMX_EPT_EXECUTABLE_MASK 0x4ull
|
||||
#define VMX_EPT_FAKE_ACCESSED_MASK (1ull << 62)
|
||||
#define VMX_EPT_FAKE_DIRTY_MASK (1ull << 63)
|
||||
|
||||
#define VMX_EPT_IDENTITY_PAGETABLE_ADDR 0xfffbc000ul
|
||||
|
||||
#endif
|
||||
|
|
|
@ -2417,6 +2417,9 @@ int kvm_arch_init(void *opaque)
|
|||
|
||||
kvm_x86_ops = ops;
|
||||
kvm_mmu_set_nonpresent_ptes(0ull, 0ull);
|
||||
kvm_mmu_set_base_ptes(PT_PRESENT_MASK);
|
||||
kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK,
|
||||
PT_DIRTY_MASK, PT64_NX_MASK, 0);
|
||||
return 0;
|
||||
|
||||
out:
|
||||
|
@ -3019,6 +3022,8 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
|
|||
|
||||
kvm_x86_ops->decache_regs(vcpu);
|
||||
|
||||
vcpu->arch.exception.pending = false;
|
||||
|
||||
vcpu_put(vcpu);
|
||||
|
||||
return 0;
|
||||
|
@ -3481,7 +3486,7 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason)
|
|||
}
|
||||
|
||||
if (reason == TASK_SWITCH_IRET || reason == TASK_SWITCH_JMP) {
|
||||
cseg_desc.type &= ~(1 << 8); //clear the B flag
|
||||
cseg_desc.type &= ~(1 << 1); //clear the B flag
|
||||
save_guest_segment_descriptor(vcpu, tr_seg.selector,
|
||||
&cseg_desc);
|
||||
}
|
||||
|
@ -3507,7 +3512,7 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason)
|
|||
}
|
||||
|
||||
if (reason != TASK_SWITCH_IRET) {
|
||||
nseg_desc.type |= (1 << 8);
|
||||
nseg_desc.type |= (1 << 1);
|
||||
save_guest_segment_descriptor(vcpu, tss_selector,
|
||||
&nseg_desc);
|
||||
}
|
||||
|
@ -3698,10 +3703,19 @@ void fx_init(struct kvm_vcpu *vcpu)
|
|||
{
|
||||
unsigned after_mxcsr_mask;
|
||||
|
||||
/*
|
||||
* Touch the fpu the first time in non atomic context as if
|
||||
* this is the first fpu instruction the exception handler
|
||||
* will fire before the instruction returns and it'll have to
|
||||
* allocate ram with GFP_KERNEL.
|
||||
*/
|
||||
if (!used_math())
|
||||
fx_save(&vcpu->arch.host_fx_image);
|
||||
|
||||
/* Initialize guest FPU by resetting ours and saving into guest's */
|
||||
preempt_disable();
|
||||
fx_save(&vcpu->arch.host_fx_image);
|
||||
fpu_init();
|
||||
fx_finit();
|
||||
fx_save(&vcpu->arch.guest_fx_image);
|
||||
fx_restore(&vcpu->arch.host_fx_image);
|
||||
preempt_enable();
|
||||
|
@ -3906,6 +3920,8 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
|
|||
kvm_free_physmem(kvm);
|
||||
if (kvm->arch.apic_access_page)
|
||||
put_page(kvm->arch.apic_access_page);
|
||||
if (kvm->arch.ept_identity_pagetable)
|
||||
put_page(kvm->arch.ept_identity_pagetable);
|
||||
kfree(kvm);
|
||||
}
|
||||
|
||||
|
|
|
@ -1761,6 +1761,7 @@ twobyte_insn:
|
|||
case 6: /* lmsw */
|
||||
realmode_lmsw(ctxt->vcpu, (u16)c->src.val,
|
||||
&ctxt->eflags);
|
||||
c->dst.type = OP_NONE;
|
||||
break;
|
||||
case 7: /* invlpg*/
|
||||
emulate_invlpg(ctxt->vcpu, memop);
|
||||
|
|
|
@ -59,6 +59,7 @@ struct kvm_vcpu_stat {
|
|||
u32 emulated_inst_exits;
|
||||
u32 dec_exits;
|
||||
u32 ext_intr_exits;
|
||||
u32 halt_wakeup;
|
||||
};
|
||||
|
||||
struct tlbe {
|
||||
|
|
|
@ -77,12 +77,17 @@ static inline void kvmppc_clear_exception(struct kvm_vcpu *vcpu, int exception)
|
|||
clear_bit(priority, &vcpu->arch.pending_exceptions);
|
||||
}
|
||||
|
||||
/* Helper function for "full" MSR writes. No need to call this if only EE is
|
||||
* changing. */
|
||||
static inline void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr)
|
||||
{
|
||||
if ((new_msr & MSR_PR) != (vcpu->arch.msr & MSR_PR))
|
||||
kvmppc_mmu_priv_switch(vcpu, new_msr & MSR_PR);
|
||||
|
||||
vcpu->arch.msr = new_msr;
|
||||
|
||||
if (vcpu->arch.msr & MSR_WE)
|
||||
kvm_vcpu_block(vcpu);
|
||||
}
|
||||
|
||||
#endif /* __POWERPC_KVM_PPC_H__ */
|
||||
|
|
|
@ -314,6 +314,9 @@ struct kvm_arch{
|
|||
struct page *apic_access_page;
|
||||
|
||||
gpa_t wall_clock;
|
||||
|
||||
struct page *ept_identity_pagetable;
|
||||
bool ept_identity_pagetable_done;
|
||||
};
|
||||
|
||||
struct kvm_vm_stat {
|
||||
|
@ -422,6 +425,7 @@ struct kvm_x86_ops {
|
|||
struct kvm_run *run);
|
||||
|
||||
int (*set_tss_addr)(struct kvm *kvm, unsigned int addr);
|
||||
int (*get_tdp_level)(void);
|
||||
};
|
||||
|
||||
extern struct kvm_x86_ops *kvm_x86_ops;
|
||||
|
@ -433,6 +437,9 @@ void kvm_mmu_destroy(struct kvm_vcpu *vcpu);
|
|||
int kvm_mmu_create(struct kvm_vcpu *vcpu);
|
||||
int kvm_mmu_setup(struct kvm_vcpu *vcpu);
|
||||
void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte);
|
||||
void kvm_mmu_set_base_ptes(u64 base_pte);
|
||||
void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask,
|
||||
u64 dirty_mask, u64 nx_mask, u64 x_mask);
|
||||
|
||||
int kvm_mmu_reset_context(struct kvm_vcpu *vcpu);
|
||||
void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot);
|
||||
|
@ -620,7 +627,7 @@ static inline void fx_restore(struct i387_fxsave_struct *image)
|
|||
asm("fxrstor (%0)":: "r" (image));
|
||||
}
|
||||
|
||||
static inline void fpu_init(void)
|
||||
static inline void fx_finit(void)
|
||||
{
|
||||
asm("finit");
|
||||
}
|
||||
|
@ -644,6 +651,7 @@ static inline void kvm_inject_gp(struct kvm_vcpu *vcpu, u32 error_code)
|
|||
#define ASM_VMX_VMWRITE_RSP_RDX ".byte 0x0f, 0x79, 0xd4"
|
||||
#define ASM_VMX_VMXOFF ".byte 0x0f, 0x01, 0xc4"
|
||||
#define ASM_VMX_VMXON_RAX ".byte 0xf3, 0x0f, 0xc7, 0x30"
|
||||
#define ASM_VMX_INVEPT ".byte 0x66, 0x0f, 0x38, 0x80, 0x08"
|
||||
#define ASM_VMX_INVVPID ".byte 0x66, 0x0f, 0x38, 0x81, 0x08"
|
||||
|
||||
#define MSR_IA32_TIME_STAMP_COUNTER 0x010
|
||||
|
|
|
@ -522,6 +522,7 @@ unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn)
|
|||
return bad_hva();
|
||||
return (slot->userspace_addr + (gfn - slot->base_gfn) * PAGE_SIZE);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(gfn_to_hva);
|
||||
|
||||
/*
|
||||
* Requires current->mm->mmap_sem to be held
|
||||
|
|
Loading…
Reference in New Issue