KVM: x86: retry non-page-table writing instructions
If the emulation is caused by #PF and it is non-page_table writing instruction, it means the VM-EXIT is caused by shadow page protected, we can zap the shadow page and retry this instruction directly The idea is from Avi Signed-off-by: Xiao Guangrong <xiaoguangrong@cn.fujitsu.com> Signed-off-by: Avi Kivity <avi@redhat.com>
This commit is contained in:
parent
d5ae7ce835
commit
1cb3f3ae5a
|
@ -364,6 +364,7 @@ enum x86_intercept {
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len);
|
int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len);
|
||||||
|
bool x86_page_table_writing_insn(struct x86_emulate_ctxt *ctxt);
|
||||||
#define EMULATION_FAILED -1
|
#define EMULATION_FAILED -1
|
||||||
#define EMULATION_OK 0
|
#define EMULATION_OK 0
|
||||||
#define EMULATION_RESTART 1
|
#define EMULATION_RESTART 1
|
||||||
|
|
|
@ -444,6 +444,9 @@ struct kvm_vcpu_arch {
|
||||||
|
|
||||||
cpumask_var_t wbinvd_dirty_mask;
|
cpumask_var_t wbinvd_dirty_mask;
|
||||||
|
|
||||||
|
unsigned long last_retry_eip;
|
||||||
|
unsigned long last_retry_addr;
|
||||||
|
|
||||||
struct {
|
struct {
|
||||||
bool halted;
|
bool halted;
|
||||||
gfn_t gfns[roundup_pow_of_two(ASYNC_PF_PER_VCPU)];
|
gfn_t gfns[roundup_pow_of_two(ASYNC_PF_PER_VCPU)];
|
||||||
|
@ -692,6 +695,7 @@ enum emulation_result {
|
||||||
#define EMULTYPE_NO_DECODE (1 << 0)
|
#define EMULTYPE_NO_DECODE (1 << 0)
|
||||||
#define EMULTYPE_TRAP_UD (1 << 1)
|
#define EMULTYPE_TRAP_UD (1 << 1)
|
||||||
#define EMULTYPE_SKIP (1 << 2)
|
#define EMULTYPE_SKIP (1 << 2)
|
||||||
|
#define EMULTYPE_RETRY (1 << 3)
|
||||||
int x86_emulate_instruction(struct kvm_vcpu *vcpu, unsigned long cr2,
|
int x86_emulate_instruction(struct kvm_vcpu *vcpu, unsigned long cr2,
|
||||||
int emulation_type, void *insn, int insn_len);
|
int emulation_type, void *insn, int insn_len);
|
||||||
|
|
||||||
|
@ -756,6 +760,7 @@ void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu);
|
||||||
void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
|
void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
|
||||||
const u8 *new, int bytes,
|
const u8 *new, int bytes,
|
||||||
bool guest_initiated);
|
bool guest_initiated);
|
||||||
|
int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn);
|
||||||
int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva);
|
int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva);
|
||||||
void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu);
|
void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu);
|
||||||
int kvm_mmu_load(struct kvm_vcpu *vcpu);
|
int kvm_mmu_load(struct kvm_vcpu *vcpu);
|
||||||
|
|
|
@ -3702,6 +3702,11 @@ done:
|
||||||
return (rc != X86EMUL_CONTINUE) ? EMULATION_FAILED : EMULATION_OK;
|
return (rc != X86EMUL_CONTINUE) ? EMULATION_FAILED : EMULATION_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool x86_page_table_writing_insn(struct x86_emulate_ctxt *ctxt)
|
||||||
|
{
|
||||||
|
return ctxt->d & PageTable;
|
||||||
|
}
|
||||||
|
|
||||||
static bool string_insn_completed(struct x86_emulate_ctxt *ctxt)
|
static bool string_insn_completed(struct x86_emulate_ctxt *ctxt)
|
||||||
{
|
{
|
||||||
/* The second termination condition only applies for REPE
|
/* The second termination condition only applies for REPE
|
||||||
|
|
|
@ -1998,7 +1998,7 @@ void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int goal_nr_mmu_pages)
|
||||||
kvm->arch.n_max_mmu_pages = goal_nr_mmu_pages;
|
kvm->arch.n_max_mmu_pages = goal_nr_mmu_pages;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn)
|
int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn)
|
||||||
{
|
{
|
||||||
struct kvm_mmu_page *sp;
|
struct kvm_mmu_page *sp;
|
||||||
struct hlist_node *node;
|
struct hlist_node *node;
|
||||||
|
@ -2007,7 +2007,7 @@ static int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn)
|
||||||
|
|
||||||
pgprintk("%s: looking for gfn %llx\n", __func__, gfn);
|
pgprintk("%s: looking for gfn %llx\n", __func__, gfn);
|
||||||
r = 0;
|
r = 0;
|
||||||
|
spin_lock(&kvm->mmu_lock);
|
||||||
for_each_gfn_indirect_valid_sp(kvm, sp, gfn, node) {
|
for_each_gfn_indirect_valid_sp(kvm, sp, gfn, node) {
|
||||||
pgprintk("%s: gfn %llx role %x\n", __func__, gfn,
|
pgprintk("%s: gfn %llx role %x\n", __func__, gfn,
|
||||||
sp->role.word);
|
sp->role.word);
|
||||||
|
@ -2015,8 +2015,11 @@ static int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn)
|
||||||
kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list);
|
kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list);
|
||||||
}
|
}
|
||||||
kvm_mmu_commit_zap_page(kvm, &invalid_list);
|
kvm_mmu_commit_zap_page(kvm, &invalid_list);
|
||||||
|
spin_unlock(&kvm->mmu_lock);
|
||||||
|
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(kvm_mmu_unprotect_page);
|
||||||
|
|
||||||
static void mmu_unshadow(struct kvm *kvm, gfn_t gfn)
|
static void mmu_unshadow(struct kvm *kvm, gfn_t gfn)
|
||||||
{
|
{
|
||||||
|
@ -3698,9 +3701,8 @@ int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva)
|
||||||
|
|
||||||
gpa = kvm_mmu_gva_to_gpa_read(vcpu, gva, NULL);
|
gpa = kvm_mmu_gva_to_gpa_read(vcpu, gva, NULL);
|
||||||
|
|
||||||
spin_lock(&vcpu->kvm->mmu_lock);
|
|
||||||
r = kvm_mmu_unprotect_page(vcpu->kvm, gpa >> PAGE_SHIFT);
|
r = kvm_mmu_unprotect_page(vcpu->kvm, gpa >> PAGE_SHIFT);
|
||||||
spin_unlock(&vcpu->kvm->mmu_lock);
|
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(kvm_mmu_unprotect_page_virt);
|
EXPORT_SYMBOL_GPL(kvm_mmu_unprotect_page_virt);
|
||||||
|
@ -3721,10 +3723,18 @@ void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu)
|
||||||
kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list);
|
kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool is_mmio_page_fault(struct kvm_vcpu *vcpu, gva_t addr)
|
||||||
|
{
|
||||||
|
if (vcpu->arch.mmu.direct_map || mmu_is_nested(vcpu))
|
||||||
|
return vcpu_match_mmio_gpa(vcpu, addr);
|
||||||
|
|
||||||
|
return vcpu_match_mmio_gva(vcpu, addr);
|
||||||
|
}
|
||||||
|
|
||||||
int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u32 error_code,
|
int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u32 error_code,
|
||||||
void *insn, int insn_len)
|
void *insn, int insn_len)
|
||||||
{
|
{
|
||||||
int r;
|
int r, emulation_type = EMULTYPE_RETRY;
|
||||||
enum emulation_result er;
|
enum emulation_result er;
|
||||||
|
|
||||||
r = vcpu->arch.mmu.page_fault(vcpu, cr2, error_code, false);
|
r = vcpu->arch.mmu.page_fault(vcpu, cr2, error_code, false);
|
||||||
|
@ -3736,7 +3746,10 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u32 error_code,
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
er = x86_emulate_instruction(vcpu, cr2, 0, insn, insn_len);
|
if (is_mmio_page_fault(vcpu, cr2))
|
||||||
|
emulation_type = 0;
|
||||||
|
|
||||||
|
er = x86_emulate_instruction(vcpu, cr2, emulation_type, insn, insn_len);
|
||||||
|
|
||||||
switch (er) {
|
switch (er) {
|
||||||
case EMULATE_DONE:
|
case EMULATE_DONE:
|
||||||
|
|
|
@ -4836,6 +4836,50 @@ static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t gva)
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool retry_instruction(struct x86_emulate_ctxt *ctxt,
|
||||||
|
unsigned long cr2, int emulation_type)
|
||||||
|
{
|
||||||
|
struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
|
||||||
|
unsigned long last_retry_eip, last_retry_addr, gpa = cr2;
|
||||||
|
|
||||||
|
last_retry_eip = vcpu->arch.last_retry_eip;
|
||||||
|
last_retry_addr = vcpu->arch.last_retry_addr;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If the emulation is caused by #PF and it is non-page_table
|
||||||
|
* writing instruction, it means the VM-EXIT is caused by shadow
|
||||||
|
* page protected, we can zap the shadow page and retry this
|
||||||
|
* instruction directly.
|
||||||
|
*
|
||||||
|
* Note: if the guest uses a non-page-table modifying instruction
|
||||||
|
* on the PDE that points to the instruction, then we will unmap
|
||||||
|
* the instruction and go to an infinite loop. So, we cache the
|
||||||
|
* last retried eip and the last fault address, if we meet the eip
|
||||||
|
* and the address again, we can break out of the potential infinite
|
||||||
|
* loop.
|
||||||
|
*/
|
||||||
|
vcpu->arch.last_retry_eip = vcpu->arch.last_retry_addr = 0;
|
||||||
|
|
||||||
|
if (!(emulation_type & EMULTYPE_RETRY))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
if (x86_page_table_writing_insn(ctxt))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
if (ctxt->eip == last_retry_eip && last_retry_addr == cr2)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
vcpu->arch.last_retry_eip = ctxt->eip;
|
||||||
|
vcpu->arch.last_retry_addr = cr2;
|
||||||
|
|
||||||
|
if (!vcpu->arch.mmu.direct_map)
|
||||||
|
gpa = kvm_mmu_gva_to_gpa_write(vcpu, cr2, NULL);
|
||||||
|
|
||||||
|
kvm_mmu_unprotect_page(vcpu->kvm, gpa >> PAGE_SHIFT);
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
int x86_emulate_instruction(struct kvm_vcpu *vcpu,
|
int x86_emulate_instruction(struct kvm_vcpu *vcpu,
|
||||||
unsigned long cr2,
|
unsigned long cr2,
|
||||||
int emulation_type,
|
int emulation_type,
|
||||||
|
@ -4877,6 +4921,9 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu,
|
||||||
return EMULATE_DONE;
|
return EMULATE_DONE;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (retry_instruction(ctxt, cr2, emulation_type))
|
||||||
|
return EMULATE_DONE;
|
||||||
|
|
||||||
/* this is needed for vmware backdoor interface to work since it
|
/* this is needed for vmware backdoor interface to work since it
|
||||||
changes registers values during IO operation */
|
changes registers values during IO operation */
|
||||||
if (vcpu->arch.emulate_regs_need_sync_from_vcpu) {
|
if (vcpu->arch.emulate_regs_need_sync_from_vcpu) {
|
||||||
|
|
Loading…
Reference in New Issue