This is a pretty large update. I think it is roughly as big
as what I usually had for the _whole_ rc period. There are a few bad bugs where the guest can OOPS or crash the host. We have also started looking at attack models for nested virtualization; bugs that usually result in the guest ring 0 crashing itself become more worrisome if you have nested virtualization, because the nested guest might bring down the non-nested guest as well. For current uses of nested virtualization these do not really have a security impact, but you never know and bugs are bugs nevertheless. A lot of these bugs are in 3.17 too, resulting in a large number of stable@ Ccs. I checked that all the patches apply there with no conflicts. -----BEGIN PGP SIGNATURE----- Version: GnuPG v2.0.22 (GNU/Linux) iQEcBAABAgAGBQJUSjmSAAoJEL/70l94x66D2cYH/3JKWsTzhXjHGxZcXQQ85CwR 49hp/crCLWJ2YRKzyAOkvwPI0/SgYKM5wJ8kgtKlpLxrPZKYwhGd1S9tKf6EdAib 5gc/SDDAgHmkqL3IrXmkyKzUVeUWvgD/IFi1Sqalko1blpRlaN/JyJV0mjjGCbA+ yH3Qi5tD0X00u00ycuZCB6mrFH0PH87BmKFiz6bSSJ43tsgD9AVD64BZid6c6hwm iaIfNcIuShavlv1TKG80cSez2qtNXjRLeTN8A10gVZo3hof/wP8aRm+LxF/1JEZX OsoNCjOhhL29qafcZOg3j/atbiAzWtSGV3vjU+iWh5mnN5oFZHcPgIGucQsuFec= =9oQY -----END PGP SIGNATURE----- Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm Pull kvm fixes from Paolo Bonzini: "This is a pretty large update. I think it is roughly as big as what I usually had for the _whole_ rc period. There are a few bad bugs where the guest can OOPS or crash the host. We have also started looking at attack models for nested virtualization; bugs that usually result in the guest ring 0 crashing itself become more worrisome if you have nested virtualization, because the nested guest might bring down the non-nested guest as well. For current uses of nested virtualization these do not really have a security impact, but you never know and bugs are bugs nevertheless. A lot of these bugs are in 3.17 too, resulting in a large number of stable@ Ccs. I checked that all the patches apply there with no conflicts" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: kvm: vfio: fix unregister kvm_device_ops of vfio KVM: x86: Wrong assertion on paging_tmpl.h kvm: fix excessive pages un-pinning in kvm_iommu_map error path. KVM: x86: PREFETCH and HINT_NOP should have SrcMem flag KVM: x86: Emulator does not decode clflush well KVM: emulate: avoid accessing NULL ctxt->memopp KVM: x86: Decoding guest instructions which cross page boundary may fail kvm: x86: don't kill guest on unknown exit reason kvm: vmx: handle invvpid vm exit gracefully KVM: x86: Handle errors when RIP is set during far jumps KVM: x86: Emulator fixes for eip canonical checks on near branches KVM: x86: Fix wrong masking on relative jump/call KVM: x86: Improve thread safety in pit KVM: x86: Prevent host from panicking on shared MSR writes. KVM: x86: Check non-canonical addresses upon WRMSR
This commit is contained in:
commit
96971e9aa9
|
@ -989,6 +989,20 @@ static inline void kvm_inject_gp(struct kvm_vcpu *vcpu, u32 error_code)
|
|||
kvm_queue_exception_e(vcpu, GP_VECTOR, error_code);
|
||||
}
|
||||
|
||||
static inline u64 get_canonical(u64 la)
|
||||
{
|
||||
return ((int64_t)la << 16) >> 16;
|
||||
}
|
||||
|
||||
static inline bool is_noncanonical_address(u64 la)
|
||||
{
|
||||
#ifdef CONFIG_X86_64
|
||||
return get_canonical(la) != la;
|
||||
#else
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
||||
#define TSS_IOPB_BASE_OFFSET 0x66
|
||||
#define TSS_BASE_SIZE 0x68
|
||||
#define TSS_IOPB_SIZE (65536 / 8)
|
||||
|
@ -1050,7 +1064,7 @@ void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm,
|
|||
unsigned long address);
|
||||
|
||||
void kvm_define_shared_msr(unsigned index, u32 msr);
|
||||
void kvm_set_shared_msr(unsigned index, u64 val, u64 mask);
|
||||
int kvm_set_shared_msr(unsigned index, u64 val, u64 mask);
|
||||
|
||||
bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip);
|
||||
|
||||
|
|
|
@ -67,6 +67,7 @@
|
|||
#define EXIT_REASON_EPT_MISCONFIG 49
|
||||
#define EXIT_REASON_INVEPT 50
|
||||
#define EXIT_REASON_PREEMPTION_TIMER 52
|
||||
#define EXIT_REASON_INVVPID 53
|
||||
#define EXIT_REASON_WBINVD 54
|
||||
#define EXIT_REASON_XSETBV 55
|
||||
#define EXIT_REASON_APIC_WRITE 56
|
||||
|
@ -114,6 +115,7 @@
|
|||
{ EXIT_REASON_EOI_INDUCED, "EOI_INDUCED" }, \
|
||||
{ EXIT_REASON_INVALID_STATE, "INVALID_STATE" }, \
|
||||
{ EXIT_REASON_INVD, "INVD" }, \
|
||||
{ EXIT_REASON_INVVPID, "INVVPID" }, \
|
||||
{ EXIT_REASON_INVPCID, "INVPCID" }
|
||||
|
||||
#endif /* _UAPIVMX_H */
|
||||
|
|
|
@ -504,11 +504,6 @@ static void rsp_increment(struct x86_emulate_ctxt *ctxt, int inc)
|
|||
masked_increment(reg_rmw(ctxt, VCPU_REGS_RSP), stack_mask(ctxt), inc);
|
||||
}
|
||||
|
||||
static inline void jmp_rel(struct x86_emulate_ctxt *ctxt, int rel)
|
||||
{
|
||||
register_address_increment(ctxt, &ctxt->_eip, rel);
|
||||
}
|
||||
|
||||
static u32 desc_limit_scaled(struct desc_struct *desc)
|
||||
{
|
||||
u32 limit = get_desc_limit(desc);
|
||||
|
@ -569,6 +564,38 @@ static int emulate_nm(struct x86_emulate_ctxt *ctxt)
|
|||
return emulate_exception(ctxt, NM_VECTOR, 0, false);
|
||||
}
|
||||
|
||||
static inline int assign_eip_far(struct x86_emulate_ctxt *ctxt, ulong dst,
|
||||
int cs_l)
|
||||
{
|
||||
switch (ctxt->op_bytes) {
|
||||
case 2:
|
||||
ctxt->_eip = (u16)dst;
|
||||
break;
|
||||
case 4:
|
||||
ctxt->_eip = (u32)dst;
|
||||
break;
|
||||
case 8:
|
||||
if ((cs_l && is_noncanonical_address(dst)) ||
|
||||
(!cs_l && (dst & ~(u32)-1)))
|
||||
return emulate_gp(ctxt, 0);
|
||||
ctxt->_eip = dst;
|
||||
break;
|
||||
default:
|
||||
WARN(1, "unsupported eip assignment size\n");
|
||||
}
|
||||
return X86EMUL_CONTINUE;
|
||||
}
|
||||
|
||||
static inline int assign_eip_near(struct x86_emulate_ctxt *ctxt, ulong dst)
|
||||
{
|
||||
return assign_eip_far(ctxt, dst, ctxt->mode == X86EMUL_MODE_PROT64);
|
||||
}
|
||||
|
||||
static inline int jmp_rel(struct x86_emulate_ctxt *ctxt, int rel)
|
||||
{
|
||||
return assign_eip_near(ctxt, ctxt->_eip + rel);
|
||||
}
|
||||
|
||||
static u16 get_segment_selector(struct x86_emulate_ctxt *ctxt, unsigned seg)
|
||||
{
|
||||
u16 selector;
|
||||
|
@ -751,8 +778,10 @@ static int __do_insn_fetch_bytes(struct x86_emulate_ctxt *ctxt, int op_size)
|
|||
static __always_inline int do_insn_fetch_bytes(struct x86_emulate_ctxt *ctxt,
|
||||
unsigned size)
|
||||
{
|
||||
if (unlikely(ctxt->fetch.end - ctxt->fetch.ptr < size))
|
||||
return __do_insn_fetch_bytes(ctxt, size);
|
||||
unsigned done_size = ctxt->fetch.end - ctxt->fetch.ptr;
|
||||
|
||||
if (unlikely(done_size < size))
|
||||
return __do_insn_fetch_bytes(ctxt, size - done_size);
|
||||
else
|
||||
return X86EMUL_CONTINUE;
|
||||
}
|
||||
|
@ -1416,7 +1445,9 @@ static int write_segment_descriptor(struct x86_emulate_ctxt *ctxt,
|
|||
|
||||
/* Does not support long mode */
|
||||
static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
|
||||
u16 selector, int seg, u8 cpl, bool in_task_switch)
|
||||
u16 selector, int seg, u8 cpl,
|
||||
bool in_task_switch,
|
||||
struct desc_struct *desc)
|
||||
{
|
||||
struct desc_struct seg_desc, old_desc;
|
||||
u8 dpl, rpl;
|
||||
|
@ -1557,6 +1588,8 @@ static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
|
|||
}
|
||||
load:
|
||||
ctxt->ops->set_segment(ctxt, selector, &seg_desc, base3, seg);
|
||||
if (desc)
|
||||
*desc = seg_desc;
|
||||
return X86EMUL_CONTINUE;
|
||||
exception:
|
||||
return emulate_exception(ctxt, err_vec, err_code, true);
|
||||
|
@ -1566,7 +1599,7 @@ static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
|
|||
u16 selector, int seg)
|
||||
{
|
||||
u8 cpl = ctxt->ops->cpl(ctxt);
|
||||
return __load_segment_descriptor(ctxt, selector, seg, cpl, false);
|
||||
return __load_segment_descriptor(ctxt, selector, seg, cpl, false, NULL);
|
||||
}
|
||||
|
||||
static void write_register_operand(struct operand *op)
|
||||
|
@ -1960,17 +1993,31 @@ static int em_iret(struct x86_emulate_ctxt *ctxt)
|
|||
static int em_jmp_far(struct x86_emulate_ctxt *ctxt)
|
||||
{
|
||||
int rc;
|
||||
unsigned short sel;
|
||||
unsigned short sel, old_sel;
|
||||
struct desc_struct old_desc, new_desc;
|
||||
const struct x86_emulate_ops *ops = ctxt->ops;
|
||||
u8 cpl = ctxt->ops->cpl(ctxt);
|
||||
|
||||
/* Assignment of RIP may only fail in 64-bit mode */
|
||||
if (ctxt->mode == X86EMUL_MODE_PROT64)
|
||||
ops->get_segment(ctxt, &old_sel, &old_desc, NULL,
|
||||
VCPU_SREG_CS);
|
||||
|
||||
memcpy(&sel, ctxt->src.valptr + ctxt->op_bytes, 2);
|
||||
|
||||
rc = load_segment_descriptor(ctxt, sel, VCPU_SREG_CS);
|
||||
rc = __load_segment_descriptor(ctxt, sel, VCPU_SREG_CS, cpl, false,
|
||||
&new_desc);
|
||||
if (rc != X86EMUL_CONTINUE)
|
||||
return rc;
|
||||
|
||||
ctxt->_eip = 0;
|
||||
memcpy(&ctxt->_eip, ctxt->src.valptr, ctxt->op_bytes);
|
||||
return X86EMUL_CONTINUE;
|
||||
rc = assign_eip_far(ctxt, ctxt->src.val, new_desc.l);
|
||||
if (rc != X86EMUL_CONTINUE) {
|
||||
WARN_ON(!ctxt->mode != X86EMUL_MODE_PROT64);
|
||||
/* assigning eip failed; restore the old cs */
|
||||
ops->set_segment(ctxt, old_sel, &old_desc, 0, VCPU_SREG_CS);
|
||||
return rc;
|
||||
}
|
||||
return rc;
|
||||
}
|
||||
|
||||
static int em_grp45(struct x86_emulate_ctxt *ctxt)
|
||||
|
@ -1981,13 +2028,15 @@ static int em_grp45(struct x86_emulate_ctxt *ctxt)
|
|||
case 2: /* call near abs */ {
|
||||
long int old_eip;
|
||||
old_eip = ctxt->_eip;
|
||||
ctxt->_eip = ctxt->src.val;
|
||||
rc = assign_eip_near(ctxt, ctxt->src.val);
|
||||
if (rc != X86EMUL_CONTINUE)
|
||||
break;
|
||||
ctxt->src.val = old_eip;
|
||||
rc = em_push(ctxt);
|
||||
break;
|
||||
}
|
||||
case 4: /* jmp abs */
|
||||
ctxt->_eip = ctxt->src.val;
|
||||
rc = assign_eip_near(ctxt, ctxt->src.val);
|
||||
break;
|
||||
case 5: /* jmp far */
|
||||
rc = em_jmp_far(ctxt);
|
||||
|
@ -2022,30 +2071,47 @@ static int em_cmpxchg8b(struct x86_emulate_ctxt *ctxt)
|
|||
|
||||
static int em_ret(struct x86_emulate_ctxt *ctxt)
|
||||
{
|
||||
ctxt->dst.type = OP_REG;
|
||||
ctxt->dst.addr.reg = &ctxt->_eip;
|
||||
ctxt->dst.bytes = ctxt->op_bytes;
|
||||
return em_pop(ctxt);
|
||||
int rc;
|
||||
unsigned long eip;
|
||||
|
||||
rc = emulate_pop(ctxt, &eip, ctxt->op_bytes);
|
||||
if (rc != X86EMUL_CONTINUE)
|
||||
return rc;
|
||||
|
||||
return assign_eip_near(ctxt, eip);
|
||||
}
|
||||
|
||||
static int em_ret_far(struct x86_emulate_ctxt *ctxt)
|
||||
{
|
||||
int rc;
|
||||
unsigned long cs;
|
||||
unsigned long eip, cs;
|
||||
u16 old_cs;
|
||||
int cpl = ctxt->ops->cpl(ctxt);
|
||||
struct desc_struct old_desc, new_desc;
|
||||
const struct x86_emulate_ops *ops = ctxt->ops;
|
||||
|
||||
rc = emulate_pop(ctxt, &ctxt->_eip, ctxt->op_bytes);
|
||||
if (ctxt->mode == X86EMUL_MODE_PROT64)
|
||||
ops->get_segment(ctxt, &old_cs, &old_desc, NULL,
|
||||
VCPU_SREG_CS);
|
||||
|
||||
rc = emulate_pop(ctxt, &eip, ctxt->op_bytes);
|
||||
if (rc != X86EMUL_CONTINUE)
|
||||
return rc;
|
||||
if (ctxt->op_bytes == 4)
|
||||
ctxt->_eip = (u32)ctxt->_eip;
|
||||
rc = emulate_pop(ctxt, &cs, ctxt->op_bytes);
|
||||
if (rc != X86EMUL_CONTINUE)
|
||||
return rc;
|
||||
/* Outer-privilege level return is not implemented */
|
||||
if (ctxt->mode >= X86EMUL_MODE_PROT16 && (cs & 3) > cpl)
|
||||
return X86EMUL_UNHANDLEABLE;
|
||||
rc = load_segment_descriptor(ctxt, (u16)cs, VCPU_SREG_CS);
|
||||
rc = __load_segment_descriptor(ctxt, (u16)cs, VCPU_SREG_CS, 0, false,
|
||||
&new_desc);
|
||||
if (rc != X86EMUL_CONTINUE)
|
||||
return rc;
|
||||
rc = assign_eip_far(ctxt, eip, new_desc.l);
|
||||
if (rc != X86EMUL_CONTINUE) {
|
||||
WARN_ON(!ctxt->mode != X86EMUL_MODE_PROT64);
|
||||
ops->set_segment(ctxt, old_cs, &old_desc, 0, VCPU_SREG_CS);
|
||||
}
|
||||
return rc;
|
||||
}
|
||||
|
||||
|
@ -2306,7 +2372,7 @@ static int em_sysexit(struct x86_emulate_ctxt *ctxt)
|
|||
{
|
||||
const struct x86_emulate_ops *ops = ctxt->ops;
|
||||
struct desc_struct cs, ss;
|
||||
u64 msr_data;
|
||||
u64 msr_data, rcx, rdx;
|
||||
int usermode;
|
||||
u16 cs_sel = 0, ss_sel = 0;
|
||||
|
||||
|
@ -2322,6 +2388,9 @@ static int em_sysexit(struct x86_emulate_ctxt *ctxt)
|
|||
else
|
||||
usermode = X86EMUL_MODE_PROT32;
|
||||
|
||||
rcx = reg_read(ctxt, VCPU_REGS_RCX);
|
||||
rdx = reg_read(ctxt, VCPU_REGS_RDX);
|
||||
|
||||
cs.dpl = 3;
|
||||
ss.dpl = 3;
|
||||
ops->get_msr(ctxt, MSR_IA32_SYSENTER_CS, &msr_data);
|
||||
|
@ -2339,6 +2408,9 @@ static int em_sysexit(struct x86_emulate_ctxt *ctxt)
|
|||
ss_sel = cs_sel + 8;
|
||||
cs.d = 0;
|
||||
cs.l = 1;
|
||||
if (is_noncanonical_address(rcx) ||
|
||||
is_noncanonical_address(rdx))
|
||||
return emulate_gp(ctxt, 0);
|
||||
break;
|
||||
}
|
||||
cs_sel |= SELECTOR_RPL_MASK;
|
||||
|
@ -2347,8 +2419,8 @@ static int em_sysexit(struct x86_emulate_ctxt *ctxt)
|
|||
ops->set_segment(ctxt, cs_sel, &cs, 0, VCPU_SREG_CS);
|
||||
ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS);
|
||||
|
||||
ctxt->_eip = reg_read(ctxt, VCPU_REGS_RDX);
|
||||
*reg_write(ctxt, VCPU_REGS_RSP) = reg_read(ctxt, VCPU_REGS_RCX);
|
||||
ctxt->_eip = rdx;
|
||||
*reg_write(ctxt, VCPU_REGS_RSP) = rcx;
|
||||
|
||||
return X86EMUL_CONTINUE;
|
||||
}
|
||||
|
@ -2466,19 +2538,24 @@ static int load_state_from_tss16(struct x86_emulate_ctxt *ctxt,
|
|||
* Now load segment descriptors. If fault happens at this stage
|
||||
* it is handled in a context of new task
|
||||
*/
|
||||
ret = __load_segment_descriptor(ctxt, tss->ldt, VCPU_SREG_LDTR, cpl, true);
|
||||
ret = __load_segment_descriptor(ctxt, tss->ldt, VCPU_SREG_LDTR, cpl,
|
||||
true, NULL);
|
||||
if (ret != X86EMUL_CONTINUE)
|
||||
return ret;
|
||||
ret = __load_segment_descriptor(ctxt, tss->es, VCPU_SREG_ES, cpl, true);
|
||||
ret = __load_segment_descriptor(ctxt, tss->es, VCPU_SREG_ES, cpl,
|
||||
true, NULL);
|
||||
if (ret != X86EMUL_CONTINUE)
|
||||
return ret;
|
||||
ret = __load_segment_descriptor(ctxt, tss->cs, VCPU_SREG_CS, cpl, true);
|
||||
ret = __load_segment_descriptor(ctxt, tss->cs, VCPU_SREG_CS, cpl,
|
||||
true, NULL);
|
||||
if (ret != X86EMUL_CONTINUE)
|
||||
return ret;
|
||||
ret = __load_segment_descriptor(ctxt, tss->ss, VCPU_SREG_SS, cpl, true);
|
||||
ret = __load_segment_descriptor(ctxt, tss->ss, VCPU_SREG_SS, cpl,
|
||||
true, NULL);
|
||||
if (ret != X86EMUL_CONTINUE)
|
||||
return ret;
|
||||
ret = __load_segment_descriptor(ctxt, tss->ds, VCPU_SREG_DS, cpl, true);
|
||||
ret = __load_segment_descriptor(ctxt, tss->ds, VCPU_SREG_DS, cpl,
|
||||
true, NULL);
|
||||
if (ret != X86EMUL_CONTINUE)
|
||||
return ret;
|
||||
|
||||
|
@ -2603,25 +2680,32 @@ static int load_state_from_tss32(struct x86_emulate_ctxt *ctxt,
|
|||
* Now load segment descriptors. If fault happenes at this stage
|
||||
* it is handled in a context of new task
|
||||
*/
|
||||
ret = __load_segment_descriptor(ctxt, tss->ldt_selector, VCPU_SREG_LDTR, cpl, true);
|
||||
ret = __load_segment_descriptor(ctxt, tss->ldt_selector, VCPU_SREG_LDTR,
|
||||
cpl, true, NULL);
|
||||
if (ret != X86EMUL_CONTINUE)
|
||||
return ret;
|
||||
ret = __load_segment_descriptor(ctxt, tss->es, VCPU_SREG_ES, cpl, true);
|
||||
ret = __load_segment_descriptor(ctxt, tss->es, VCPU_SREG_ES, cpl,
|
||||
true, NULL);
|
||||
if (ret != X86EMUL_CONTINUE)
|
||||
return ret;
|
||||
ret = __load_segment_descriptor(ctxt, tss->cs, VCPU_SREG_CS, cpl, true);
|
||||
ret = __load_segment_descriptor(ctxt, tss->cs, VCPU_SREG_CS, cpl,
|
||||
true, NULL);
|
||||
if (ret != X86EMUL_CONTINUE)
|
||||
return ret;
|
||||
ret = __load_segment_descriptor(ctxt, tss->ss, VCPU_SREG_SS, cpl, true);
|
||||
ret = __load_segment_descriptor(ctxt, tss->ss, VCPU_SREG_SS, cpl,
|
||||
true, NULL);
|
||||
if (ret != X86EMUL_CONTINUE)
|
||||
return ret;
|
||||
ret = __load_segment_descriptor(ctxt, tss->ds, VCPU_SREG_DS, cpl, true);
|
||||
ret = __load_segment_descriptor(ctxt, tss->ds, VCPU_SREG_DS, cpl,
|
||||
true, NULL);
|
||||
if (ret != X86EMUL_CONTINUE)
|
||||
return ret;
|
||||
ret = __load_segment_descriptor(ctxt, tss->fs, VCPU_SREG_FS, cpl, true);
|
||||
ret = __load_segment_descriptor(ctxt, tss->fs, VCPU_SREG_FS, cpl,
|
||||
true, NULL);
|
||||
if (ret != X86EMUL_CONTINUE)
|
||||
return ret;
|
||||
ret = __load_segment_descriptor(ctxt, tss->gs, VCPU_SREG_GS, cpl, true);
|
||||
ret = __load_segment_descriptor(ctxt, tss->gs, VCPU_SREG_GS, cpl,
|
||||
true, NULL);
|
||||
if (ret != X86EMUL_CONTINUE)
|
||||
return ret;
|
||||
|
||||
|
@ -2888,10 +2972,13 @@ static int em_aad(struct x86_emulate_ctxt *ctxt)
|
|||
|
||||
static int em_call(struct x86_emulate_ctxt *ctxt)
|
||||
{
|
||||
int rc;
|
||||
long rel = ctxt->src.val;
|
||||
|
||||
ctxt->src.val = (unsigned long)ctxt->_eip;
|
||||
jmp_rel(ctxt, rel);
|
||||
rc = jmp_rel(ctxt, rel);
|
||||
if (rc != X86EMUL_CONTINUE)
|
||||
return rc;
|
||||
return em_push(ctxt);
|
||||
}
|
||||
|
||||
|
@ -2900,34 +2987,50 @@ static int em_call_far(struct x86_emulate_ctxt *ctxt)
|
|||
u16 sel, old_cs;
|
||||
ulong old_eip;
|
||||
int rc;
|
||||
struct desc_struct old_desc, new_desc;
|
||||
const struct x86_emulate_ops *ops = ctxt->ops;
|
||||
int cpl = ctxt->ops->cpl(ctxt);
|
||||
|
||||
old_cs = get_segment_selector(ctxt, VCPU_SREG_CS);
|
||||
old_eip = ctxt->_eip;
|
||||
ops->get_segment(ctxt, &old_cs, &old_desc, NULL, VCPU_SREG_CS);
|
||||
|
||||
memcpy(&sel, ctxt->src.valptr + ctxt->op_bytes, 2);
|
||||
if (load_segment_descriptor(ctxt, sel, VCPU_SREG_CS))
|
||||
rc = __load_segment_descriptor(ctxt, sel, VCPU_SREG_CS, cpl, false,
|
||||
&new_desc);
|
||||
if (rc != X86EMUL_CONTINUE)
|
||||
return X86EMUL_CONTINUE;
|
||||
|
||||
ctxt->_eip = 0;
|
||||
memcpy(&ctxt->_eip, ctxt->src.valptr, ctxt->op_bytes);
|
||||
rc = assign_eip_far(ctxt, ctxt->src.val, new_desc.l);
|
||||
if (rc != X86EMUL_CONTINUE)
|
||||
goto fail;
|
||||
|
||||
ctxt->src.val = old_cs;
|
||||
rc = em_push(ctxt);
|
||||
if (rc != X86EMUL_CONTINUE)
|
||||
return rc;
|
||||
goto fail;
|
||||
|
||||
ctxt->src.val = old_eip;
|
||||
return em_push(ctxt);
|
||||
rc = em_push(ctxt);
|
||||
/* If we failed, we tainted the memory, but the very least we should
|
||||
restore cs */
|
||||
if (rc != X86EMUL_CONTINUE)
|
||||
goto fail;
|
||||
return rc;
|
||||
fail:
|
||||
ops->set_segment(ctxt, old_cs, &old_desc, 0, VCPU_SREG_CS);
|
||||
return rc;
|
||||
|
||||
}
|
||||
|
||||
static int em_ret_near_imm(struct x86_emulate_ctxt *ctxt)
|
||||
{
|
||||
int rc;
|
||||
unsigned long eip;
|
||||
|
||||
ctxt->dst.type = OP_REG;
|
||||
ctxt->dst.addr.reg = &ctxt->_eip;
|
||||
ctxt->dst.bytes = ctxt->op_bytes;
|
||||
rc = emulate_pop(ctxt, &ctxt->dst.val, ctxt->op_bytes);
|
||||
rc = emulate_pop(ctxt, &eip, ctxt->op_bytes);
|
||||
if (rc != X86EMUL_CONTINUE)
|
||||
return rc;
|
||||
rc = assign_eip_near(ctxt, eip);
|
||||
if (rc != X86EMUL_CONTINUE)
|
||||
return rc;
|
||||
rsp_increment(ctxt, ctxt->src.val);
|
||||
|
@ -3254,20 +3357,24 @@ static int em_lmsw(struct x86_emulate_ctxt *ctxt)
|
|||
|
||||
static int em_loop(struct x86_emulate_ctxt *ctxt)
|
||||
{
|
||||
int rc = X86EMUL_CONTINUE;
|
||||
|
||||
register_address_increment(ctxt, reg_rmw(ctxt, VCPU_REGS_RCX), -1);
|
||||
if ((address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) != 0) &&
|
||||
(ctxt->b == 0xe2 || test_cc(ctxt->b ^ 0x5, ctxt->eflags)))
|
||||
jmp_rel(ctxt, ctxt->src.val);
|
||||
rc = jmp_rel(ctxt, ctxt->src.val);
|
||||
|
||||
return X86EMUL_CONTINUE;
|
||||
return rc;
|
||||
}
|
||||
|
||||
static int em_jcxz(struct x86_emulate_ctxt *ctxt)
|
||||
{
|
||||
if (address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) == 0)
|
||||
jmp_rel(ctxt, ctxt->src.val);
|
||||
int rc = X86EMUL_CONTINUE;
|
||||
|
||||
return X86EMUL_CONTINUE;
|
||||
if (address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) == 0)
|
||||
rc = jmp_rel(ctxt, ctxt->src.val);
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
static int em_in(struct x86_emulate_ctxt *ctxt)
|
||||
|
@ -3355,6 +3462,12 @@ static int em_bswap(struct x86_emulate_ctxt *ctxt)
|
|||
return X86EMUL_CONTINUE;
|
||||
}
|
||||
|
||||
static int em_clflush(struct x86_emulate_ctxt *ctxt)
|
||||
{
|
||||
/* emulating clflush regardless of cpuid */
|
||||
return X86EMUL_CONTINUE;
|
||||
}
|
||||
|
||||
static bool valid_cr(int nr)
|
||||
{
|
||||
switch (nr) {
|
||||
|
@ -3693,6 +3806,16 @@ static const struct opcode group11[] = {
|
|||
X7(D(Undefined)),
|
||||
};
|
||||
|
||||
static const struct gprefix pfx_0f_ae_7 = {
|
||||
I(SrcMem | ByteOp, em_clflush), N, N, N,
|
||||
};
|
||||
|
||||
static const struct group_dual group15 = { {
|
||||
N, N, N, N, N, N, N, GP(0, &pfx_0f_ae_7),
|
||||
}, {
|
||||
N, N, N, N, N, N, N, N,
|
||||
} };
|
||||
|
||||
static const struct gprefix pfx_0f_6f_0f_7f = {
|
||||
I(Mmx, em_mov), I(Sse | Aligned, em_mov), N, I(Sse | Unaligned, em_mov),
|
||||
};
|
||||
|
@ -3901,10 +4024,11 @@ static const struct opcode twobyte_table[256] = {
|
|||
N, I(ImplicitOps | EmulateOnUD, em_syscall),
|
||||
II(ImplicitOps | Priv, em_clts, clts), N,
|
||||
DI(ImplicitOps | Priv, invd), DI(ImplicitOps | Priv, wbinvd), N, N,
|
||||
N, D(ImplicitOps | ModRM), N, N,
|
||||
N, D(ImplicitOps | ModRM | SrcMem | NoAccess), N, N,
|
||||
/* 0x10 - 0x1F */
|
||||
N, N, N, N, N, N, N, N,
|
||||
D(ImplicitOps | ModRM), N, N, N, N, N, N, D(ImplicitOps | ModRM),
|
||||
D(ImplicitOps | ModRM | SrcMem | NoAccess),
|
||||
N, N, N, N, N, N, D(ImplicitOps | ModRM | SrcMem | NoAccess),
|
||||
/* 0x20 - 0x2F */
|
||||
DIP(ModRM | DstMem | Priv | Op3264 | NoMod, cr_read, check_cr_read),
|
||||
DIP(ModRM | DstMem | Priv | Op3264 | NoMod, dr_read, check_dr_read),
|
||||
|
@ -3956,7 +4080,7 @@ static const struct opcode twobyte_table[256] = {
|
|||
F(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_bts),
|
||||
F(DstMem | SrcReg | Src2ImmByte | ModRM, em_shrd),
|
||||
F(DstMem | SrcReg | Src2CL | ModRM, em_shrd),
|
||||
D(ModRM), F(DstReg | SrcMem | ModRM, em_imul),
|
||||
GD(0, &group15), F(DstReg | SrcMem | ModRM, em_imul),
|
||||
/* 0xB0 - 0xB7 */
|
||||
I2bv(DstMem | SrcReg | ModRM | Lock | PageTable, em_cmpxchg),
|
||||
I(DstReg | SrcMemFAddr | ModRM | Src2SS, em_lseg),
|
||||
|
@ -4473,10 +4597,10 @@ done_prefixes:
|
|||
/* Decode and fetch the destination operand: register or memory. */
|
||||
rc = decode_operand(ctxt, &ctxt->dst, (ctxt->d >> DstShift) & OpMask);
|
||||
|
||||
done:
|
||||
if (ctxt->rip_relative)
|
||||
ctxt->memopp->addr.mem.ea += ctxt->_eip;
|
||||
|
||||
done:
|
||||
return (rc != X86EMUL_CONTINUE) ? EMULATION_FAILED : EMULATION_OK;
|
||||
}
|
||||
|
||||
|
@ -4726,7 +4850,7 @@ special_insn:
|
|||
break;
|
||||
case 0x70 ... 0x7f: /* jcc (short) */
|
||||
if (test_cc(ctxt->b, ctxt->eflags))
|
||||
jmp_rel(ctxt, ctxt->src.val);
|
||||
rc = jmp_rel(ctxt, ctxt->src.val);
|
||||
break;
|
||||
case 0x8d: /* lea r16/r32, m */
|
||||
ctxt->dst.val = ctxt->src.addr.mem.ea;
|
||||
|
@ -4756,7 +4880,7 @@ special_insn:
|
|||
break;
|
||||
case 0xe9: /* jmp rel */
|
||||
case 0xeb: /* jmp rel short */
|
||||
jmp_rel(ctxt, ctxt->src.val);
|
||||
rc = jmp_rel(ctxt, ctxt->src.val);
|
||||
ctxt->dst.type = OP_NONE; /* Disable writeback. */
|
||||
break;
|
||||
case 0xf4: /* hlt */
|
||||
|
@ -4881,13 +5005,11 @@ twobyte_insn:
|
|||
break;
|
||||
case 0x80 ... 0x8f: /* jnz rel, etc*/
|
||||
if (test_cc(ctxt->b, ctxt->eflags))
|
||||
jmp_rel(ctxt, ctxt->src.val);
|
||||
rc = jmp_rel(ctxt, ctxt->src.val);
|
||||
break;
|
||||
case 0x90 ... 0x9f: /* setcc r/m8 */
|
||||
ctxt->dst.val = test_cc(ctxt->b, ctxt->eflags);
|
||||
break;
|
||||
case 0xae: /* clflush */
|
||||
break;
|
||||
case 0xb6 ... 0xb7: /* movzx */
|
||||
ctxt->dst.bytes = ctxt->op_bytes;
|
||||
ctxt->dst.val = (ctxt->src.bytes == 1) ? (u8) ctxt->src.val
|
||||
|
|
|
@ -262,8 +262,10 @@ void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu)
|
|||
return;
|
||||
|
||||
timer = &pit->pit_state.timer;
|
||||
mutex_lock(&pit->pit_state.lock);
|
||||
if (hrtimer_cancel(timer))
|
||||
hrtimer_start_expires(timer, HRTIMER_MODE_ABS);
|
||||
mutex_unlock(&pit->pit_state.lock);
|
||||
}
|
||||
|
||||
static void destroy_pit_timer(struct kvm_pit *pit)
|
||||
|
|
|
@ -298,7 +298,7 @@ retry_walk:
|
|||
}
|
||||
#endif
|
||||
walker->max_level = walker->level;
|
||||
ASSERT(!is_long_mode(vcpu) && is_pae(vcpu));
|
||||
ASSERT(!(is_long_mode(vcpu) && !is_pae(vcpu)));
|
||||
|
||||
accessed_dirty = PT_GUEST_ACCESSED_MASK;
|
||||
pt_access = pte_access = ACC_ALL;
|
||||
|
|
|
@ -3251,7 +3251,7 @@ static int wrmsr_interception(struct vcpu_svm *svm)
|
|||
msr.host_initiated = false;
|
||||
|
||||
svm->next_rip = kvm_rip_read(&svm->vcpu) + 2;
|
||||
if (svm_set_msr(&svm->vcpu, &msr)) {
|
||||
if (kvm_set_msr(&svm->vcpu, &msr)) {
|
||||
trace_kvm_msr_write_ex(ecx, data);
|
||||
kvm_inject_gp(&svm->vcpu, 0);
|
||||
} else {
|
||||
|
@ -3551,9 +3551,9 @@ static int handle_exit(struct kvm_vcpu *vcpu)
|
|||
|
||||
if (exit_code >= ARRAY_SIZE(svm_exit_handlers)
|
||||
|| !svm_exit_handlers[exit_code]) {
|
||||
kvm_run->exit_reason = KVM_EXIT_UNKNOWN;
|
||||
kvm_run->hw.hardware_exit_reason = exit_code;
|
||||
return 0;
|
||||
WARN_ONCE(1, "vmx: unexpected exit reason 0x%x\n", exit_code);
|
||||
kvm_queue_exception(vcpu, UD_VECTOR);
|
||||
return 1;
|
||||
}
|
||||
|
||||
return svm_exit_handlers[exit_code](svm);
|
||||
|
|
|
@ -2659,12 +2659,15 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
|
|||
default:
|
||||
msr = find_msr_entry(vmx, msr_index);
|
||||
if (msr) {
|
||||
u64 old_msr_data = msr->data;
|
||||
msr->data = data;
|
||||
if (msr - vmx->guest_msrs < vmx->save_nmsrs) {
|
||||
preempt_disable();
|
||||
kvm_set_shared_msr(msr->index, msr->data,
|
||||
msr->mask);
|
||||
ret = kvm_set_shared_msr(msr->index, msr->data,
|
||||
msr->mask);
|
||||
preempt_enable();
|
||||
if (ret)
|
||||
msr->data = old_msr_data;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
@ -5291,7 +5294,7 @@ static int handle_wrmsr(struct kvm_vcpu *vcpu)
|
|||
msr.data = data;
|
||||
msr.index = ecx;
|
||||
msr.host_initiated = false;
|
||||
if (vmx_set_msr(vcpu, &msr) != 0) {
|
||||
if (kvm_set_msr(vcpu, &msr) != 0) {
|
||||
trace_kvm_msr_write_ex(ecx, data);
|
||||
kvm_inject_gp(vcpu, 0);
|
||||
return 1;
|
||||
|
@ -6743,6 +6746,12 @@ static int handle_invept(struct kvm_vcpu *vcpu)
|
|||
return 1;
|
||||
}
|
||||
|
||||
static int handle_invvpid(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
kvm_queue_exception(vcpu, UD_VECTOR);
|
||||
return 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* The exit handlers return 1 if the exit was handled fully and guest execution
|
||||
* may resume. Otherwise they set the kvm_run parameter to indicate what needs
|
||||
|
@ -6788,6 +6797,7 @@ static int (*const kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
|
|||
[EXIT_REASON_MWAIT_INSTRUCTION] = handle_mwait,
|
||||
[EXIT_REASON_MONITOR_INSTRUCTION] = handle_monitor,
|
||||
[EXIT_REASON_INVEPT] = handle_invept,
|
||||
[EXIT_REASON_INVVPID] = handle_invvpid,
|
||||
};
|
||||
|
||||
static const int kvm_vmx_max_exit_handlers =
|
||||
|
@ -7023,7 +7033,7 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
|
|||
case EXIT_REASON_VMPTRST: case EXIT_REASON_VMREAD:
|
||||
case EXIT_REASON_VMRESUME: case EXIT_REASON_VMWRITE:
|
||||
case EXIT_REASON_VMOFF: case EXIT_REASON_VMON:
|
||||
case EXIT_REASON_INVEPT:
|
||||
case EXIT_REASON_INVEPT: case EXIT_REASON_INVVPID:
|
||||
/*
|
||||
* VMX instructions trap unconditionally. This allows L1 to
|
||||
* emulate them for its L2 guest, i.e., allows 3-level nesting!
|
||||
|
@ -7164,10 +7174,10 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
|
|||
&& kvm_vmx_exit_handlers[exit_reason])
|
||||
return kvm_vmx_exit_handlers[exit_reason](vcpu);
|
||||
else {
|
||||
vcpu->run->exit_reason = KVM_EXIT_UNKNOWN;
|
||||
vcpu->run->hw.hardware_exit_reason = exit_reason;
|
||||
WARN_ONCE(1, "vmx: unexpected exit reason 0x%x\n", exit_reason);
|
||||
kvm_queue_exception(vcpu, UD_VECTOR);
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
|
||||
|
|
|
@ -229,20 +229,25 @@ static void kvm_shared_msr_cpu_online(void)
|
|||
shared_msr_update(i, shared_msrs_global.msrs[i]);
|
||||
}
|
||||
|
||||
void kvm_set_shared_msr(unsigned slot, u64 value, u64 mask)
|
||||
int kvm_set_shared_msr(unsigned slot, u64 value, u64 mask)
|
||||
{
|
||||
unsigned int cpu = smp_processor_id();
|
||||
struct kvm_shared_msrs *smsr = per_cpu_ptr(shared_msrs, cpu);
|
||||
int err;
|
||||
|
||||
if (((value ^ smsr->values[slot].curr) & mask) == 0)
|
||||
return;
|
||||
return 0;
|
||||
smsr->values[slot].curr = value;
|
||||
wrmsrl(shared_msrs_global.msrs[slot], value);
|
||||
err = wrmsrl_safe(shared_msrs_global.msrs[slot], value);
|
||||
if (err)
|
||||
return 1;
|
||||
|
||||
if (!smsr->registered) {
|
||||
smsr->urn.on_user_return = kvm_on_user_return;
|
||||
user_return_notifier_register(&smsr->urn);
|
||||
smsr->registered = true;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_set_shared_msr);
|
||||
|
||||
|
@ -987,7 +992,6 @@ void kvm_enable_efer_bits(u64 mask)
|
|||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_enable_efer_bits);
|
||||
|
||||
|
||||
/*
|
||||
* Writes msr value into into the appropriate "register".
|
||||
* Returns 0 on success, non-0 otherwise.
|
||||
|
@ -995,8 +999,34 @@ EXPORT_SYMBOL_GPL(kvm_enable_efer_bits);
|
|||
*/
|
||||
int kvm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
|
||||
{
|
||||
switch (msr->index) {
|
||||
case MSR_FS_BASE:
|
||||
case MSR_GS_BASE:
|
||||
case MSR_KERNEL_GS_BASE:
|
||||
case MSR_CSTAR:
|
||||
case MSR_LSTAR:
|
||||
if (is_noncanonical_address(msr->data))
|
||||
return 1;
|
||||
break;
|
||||
case MSR_IA32_SYSENTER_EIP:
|
||||
case MSR_IA32_SYSENTER_ESP:
|
||||
/*
|
||||
* IA32_SYSENTER_ESP and IA32_SYSENTER_EIP cause #GP if
|
||||
* non-canonical address is written on Intel but not on
|
||||
* AMD (which ignores the top 32-bits, because it does
|
||||
* not implement 64-bit SYSENTER).
|
||||
*
|
||||
* 64-bit code should hence be able to write a non-canonical
|
||||
* value on AMD. Making the address canonical ensures that
|
||||
* vmentry does not fail on Intel after writing a non-canonical
|
||||
* value, and that something deterministic happens if the guest
|
||||
* invokes 64-bit SYSENTER.
|
||||
*/
|
||||
msr->data = get_canonical(msr->data);
|
||||
}
|
||||
return kvm_x86_ops->set_msr(vcpu, msr);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_set_msr);
|
||||
|
||||
/*
|
||||
* Adapt set_msr() to msr_io()'s calling convention
|
||||
|
|
|
@ -1080,6 +1080,7 @@ void kvm_device_get(struct kvm_device *dev);
|
|||
void kvm_device_put(struct kvm_device *dev);
|
||||
struct kvm_device *kvm_device_from_filp(struct file *filp);
|
||||
int kvm_register_device_ops(struct kvm_device_ops *ops, u32 type);
|
||||
void kvm_unregister_device_ops(u32 type);
|
||||
|
||||
extern struct kvm_device_ops kvm_mpic_ops;
|
||||
extern struct kvm_device_ops kvm_xics_ops;
|
||||
|
|
|
@ -43,13 +43,13 @@ static void kvm_iommu_put_pages(struct kvm *kvm,
|
|||
gfn_t base_gfn, unsigned long npages);
|
||||
|
||||
static pfn_t kvm_pin_pages(struct kvm_memory_slot *slot, gfn_t gfn,
|
||||
unsigned long size)
|
||||
unsigned long npages)
|
||||
{
|
||||
gfn_t end_gfn;
|
||||
pfn_t pfn;
|
||||
|
||||
pfn = gfn_to_pfn_memslot(slot, gfn);
|
||||
end_gfn = gfn + (size >> PAGE_SHIFT);
|
||||
end_gfn = gfn + npages;
|
||||
gfn += 1;
|
||||
|
||||
if (is_error_noslot_pfn(pfn))
|
||||
|
@ -119,7 +119,7 @@ int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot)
|
|||
* Pin all pages we are about to map in memory. This is
|
||||
* important because we unmap and unpin in 4kb steps later.
|
||||
*/
|
||||
pfn = kvm_pin_pages(slot, gfn, page_size);
|
||||
pfn = kvm_pin_pages(slot, gfn, page_size >> PAGE_SHIFT);
|
||||
if (is_error_noslot_pfn(pfn)) {
|
||||
gfn += 1;
|
||||
continue;
|
||||
|
@ -131,7 +131,7 @@ int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot)
|
|||
if (r) {
|
||||
printk(KERN_ERR "kvm_iommu_map_address:"
|
||||
"iommu failed to map pfn=%llx\n", pfn);
|
||||
kvm_unpin_pages(kvm, pfn, page_size);
|
||||
kvm_unpin_pages(kvm, pfn, page_size >> PAGE_SHIFT);
|
||||
goto unmap_pages;
|
||||
}
|
||||
|
||||
|
|
|
@ -2354,6 +2354,12 @@ int kvm_register_device_ops(struct kvm_device_ops *ops, u32 type)
|
|||
return 0;
|
||||
}
|
||||
|
||||
void kvm_unregister_device_ops(u32 type)
|
||||
{
|
||||
if (kvm_device_ops_table[type] != NULL)
|
||||
kvm_device_ops_table[type] = NULL;
|
||||
}
|
||||
|
||||
static int kvm_ioctl_create_device(struct kvm *kvm,
|
||||
struct kvm_create_device *cd)
|
||||
{
|
||||
|
@ -3328,5 +3334,6 @@ void kvm_exit(void)
|
|||
kvm_arch_exit();
|
||||
kvm_irqfd_exit();
|
||||
free_cpumask_var(cpus_hardware_enabled);
|
||||
kvm_vfio_ops_exit();
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_exit);
|
||||
|
|
|
@ -283,3 +283,8 @@ int kvm_vfio_ops_init(void)
|
|||
{
|
||||
return kvm_register_device_ops(&kvm_vfio_ops, KVM_DEV_TYPE_VFIO);
|
||||
}
|
||||
|
||||
void kvm_vfio_ops_exit(void)
|
||||
{
|
||||
kvm_unregister_device_ops(KVM_DEV_TYPE_VFIO);
|
||||
}
|
||||
|
|
|
@ -3,11 +3,15 @@
|
|||
|
||||
#ifdef CONFIG_KVM_VFIO
|
||||
int kvm_vfio_ops_init(void);
|
||||
void kvm_vfio_ops_exit(void);
|
||||
#else
|
||||
static inline int kvm_vfio_ops_init(void)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
static inline void kvm_vfio_ops_exit(void)
|
||||
{
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
|
Loading…
Reference in New Issue