KVM: x86: Don't let userspace set host-reserved cr4 bits
Calculate the host-reserved cr4 bits at runtime based on the system's capabilities (using logic similar to __do_cpuid_func()), and use the dynamically generated mask for the reserved bit check in kvm_set_cr4() instead using of the static CR4_RESERVED_BITS define. This prevents userspace from "enabling" features in cr4 that are not supported by the system, e.g. by ignoring KVM_GET_SUPPORTED_CPUID and specifying a bogus CPUID for the vCPU. Allowing userspace to set unsupported bits in cr4 can lead to a variety of undesirable behavior, e.g. failed VM-Enter, and in general increases KVM's attack surface. A crafty userspace can even abuse CR4.LA57 to induce an unchecked #GP on a WRMSR. On a platform without LA57 support: KVM_SET_CPUID2 // CPUID_7_0_ECX.LA57 = 1 KVM_SET_SREGS // CR4.LA57 = 1 KVM_SET_MSRS // KERNEL_GS_BASE = 0x0004000000000000 KVM_RUN leads to a #GP when writing KERNEL_GS_BASE into hardware: unchecked MSR access error: WRMSR to 0xc0000102 (tried to write 0x0004000000000000) at rIP: 0xffffffffa00f239a (vmx_prepare_switch_to_guest+0x10a/0x1d0 [kvm_intel]) Call Trace: kvm_arch_vcpu_ioctl_run+0x671/0x1c70 [kvm] kvm_vcpu_ioctl+0x36b/0x5d0 [kvm] do_vfs_ioctl+0xa1/0x620 ksys_ioctl+0x66/0x70 __x64_sys_ioctl+0x16/0x20 do_syscall_64+0x4c/0x170 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x7fc08133bf47 Note, the above sequence fails VM-Enter due to invalid guest state. Userspace can allow VM-Enter to succeed (after the WRMSR #GP) by adding a KVM_SET_SREGS w/ CR4.LA57=0 after KVM_SET_MSRS, in which case KVM will technically leak the host's KERNEL_GS_BASE into the guest. But, as KERNEL_GS_BASE is a userspace-defined value/address, the leak is largely benign as a malicious userspace would simply be exposing its own data to the guest, and attacking a benevolent userspace would require multiple bugs in the userspace VMM. Cc: stable@vger.kernel.org Cc: Jun Nakajima <jun.nakajima@intel.com> Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
This commit is contained in:
parent
e348ac7c9e
commit
b11306b53b
|
@ -93,6 +93,8 @@ u64 __read_mostly efer_reserved_bits = ~((u64)(EFER_SCE | EFER_LME | EFER_LMA));
|
|||
static u64 __read_mostly efer_reserved_bits = ~((u64)EFER_SCE);
|
||||
#endif
|
||||
|
||||
static u64 __read_mostly cr4_reserved_bits = CR4_RESERVED_BITS;
|
||||
|
||||
#define VM_STAT(x, ...) offsetof(struct kvm, stat.x), KVM_STAT_VM, ## __VA_ARGS__
|
||||
#define VCPU_STAT(x, ...) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU, ## __VA_ARGS__
|
||||
|
||||
|
@ -879,9 +881,38 @@ int kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)
|
|||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_set_xcr);
|
||||
|
||||
static u64 kvm_host_cr4_reserved_bits(struct cpuinfo_x86 *c)
|
||||
{
|
||||
u64 reserved_bits = CR4_RESERVED_BITS;
|
||||
|
||||
if (!cpu_has(c, X86_FEATURE_XSAVE))
|
||||
reserved_bits |= X86_CR4_OSXSAVE;
|
||||
|
||||
if (!cpu_has(c, X86_FEATURE_SMEP))
|
||||
reserved_bits |= X86_CR4_SMEP;
|
||||
|
||||
if (!cpu_has(c, X86_FEATURE_SMAP))
|
||||
reserved_bits |= X86_CR4_SMAP;
|
||||
|
||||
if (!cpu_has(c, X86_FEATURE_FSGSBASE))
|
||||
reserved_bits |= X86_CR4_FSGSBASE;
|
||||
|
||||
if (!cpu_has(c, X86_FEATURE_PKU))
|
||||
reserved_bits |= X86_CR4_PKE;
|
||||
|
||||
if (!cpu_has(c, X86_FEATURE_LA57) &&
|
||||
!(cpuid_ecx(0x7) & bit(X86_FEATURE_LA57)))
|
||||
reserved_bits |= X86_CR4_LA57;
|
||||
|
||||
if (!cpu_has(c, X86_FEATURE_UMIP) && !kvm_x86_ops->umip_emulated())
|
||||
reserved_bits |= X86_CR4_UMIP;
|
||||
|
||||
return reserved_bits;
|
||||
}
|
||||
|
||||
static int kvm_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
|
||||
{
|
||||
if (cr4 & CR4_RESERVED_BITS)
|
||||
if (cr4 & cr4_reserved_bits)
|
||||
return -EINVAL;
|
||||
|
||||
if (!guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) && (cr4 & X86_CR4_OSXSAVE))
|
||||
|
@ -9400,6 +9431,8 @@ int kvm_arch_hardware_setup(void)
|
|||
if (r != 0)
|
||||
return r;
|
||||
|
||||
cr4_reserved_bits = kvm_host_cr4_reserved_bits(&boot_cpu_data);
|
||||
|
||||
if (kvm_has_tsc_control) {
|
||||
/*
|
||||
* Make sure the user can only configure tsc_khz values that
|
||||
|
|
Loading…
Reference in New Issue