Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/avi/kvm

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/avi/kvm:
  KVM: Use new smp_call_function_mask() in kvm_flush_remote_tlbs()
  sched: don't clear PF_VCPU in scheduler
  KVM: Improve local apic timer wraparound handling
  KVM: Fix local apic timer divide by zero
  KVM: Move kvm_guest_exit() after local_irq_enable()
  KVM: x86 emulator: fix access registers for instructions with ModR/M byte and Mod = 3
  KVM: VMX: Force vm86 mode if setting flags during real mode
  KVM: x86 emulator: implement 'movnti mem, reg'
  KVM: VMX: Reset mmu context when entering real mode
  KVM: VMX: Handle NMIs before enabling interrupts and preemption
  KVM: MMU: Set shadow pte atomically in mmu_pte_write_zap_pte()
  KVM: x86 emulator: fix repne/repnz decoding
  KVM: x86 emulator: fix merge screwup due to emulator split
This commit is contained in:
Linus Torvalds 2007-10-22 19:24:17 -07:00
commit 0fd56c7033
6 changed files with 103 additions and 69 deletions

View File

@ -198,21 +198,15 @@ static void vcpu_put(struct kvm_vcpu *vcpu)
static void ack_flush(void *_completed) static void ack_flush(void *_completed)
{ {
atomic_t *completed = _completed;
atomic_inc(completed);
} }
void kvm_flush_remote_tlbs(struct kvm *kvm) void kvm_flush_remote_tlbs(struct kvm *kvm)
{ {
int i, cpu, needed; int i, cpu;
cpumask_t cpus; cpumask_t cpus;
struct kvm_vcpu *vcpu; struct kvm_vcpu *vcpu;
atomic_t completed;
atomic_set(&completed, 0);
cpus_clear(cpus); cpus_clear(cpus);
needed = 0;
for (i = 0; i < KVM_MAX_VCPUS; ++i) { for (i = 0; i < KVM_MAX_VCPUS; ++i) {
vcpu = kvm->vcpus[i]; vcpu = kvm->vcpus[i];
if (!vcpu) if (!vcpu)
@ -221,23 +215,9 @@ void kvm_flush_remote_tlbs(struct kvm *kvm)
continue; continue;
cpu = vcpu->cpu; cpu = vcpu->cpu;
if (cpu != -1 && cpu != raw_smp_processor_id()) if (cpu != -1 && cpu != raw_smp_processor_id())
if (!cpu_isset(cpu, cpus)) { cpu_set(cpu, cpus);
cpu_set(cpu, cpus);
++needed;
}
}
/*
* We really want smp_call_function_mask() here. But that's not
* available, so ipi all cpus in parallel and wait for them
* to complete.
*/
for (cpu = first_cpu(cpus); cpu != NR_CPUS; cpu = next_cpu(cpu, cpus))
smp_call_function_single(cpu, ack_flush, &completed, 1, 0);
while (atomic_read(&completed) != needed) {
cpu_relax();
barrier();
} }
smp_call_function_mask(cpus, ack_flush, NULL, 1);
} }
int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id) int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id)
@ -2054,12 +2034,21 @@ again:
kvm_x86_ops->run(vcpu, kvm_run); kvm_x86_ops->run(vcpu, kvm_run);
kvm_guest_exit();
vcpu->guest_mode = 0; vcpu->guest_mode = 0;
local_irq_enable(); local_irq_enable();
++vcpu->stat.exits; ++vcpu->stat.exits;
/*
* We must have an instruction between local_irq_enable() and
* kvm_guest_exit(), so the timer interrupt isn't delayed by
* the interrupt shadow. The stat.exits increment will do nicely.
* But we need to prevent reordering, hence this barrier():
*/
barrier();
kvm_guest_exit();
preempt_enable(); preempt_enable();
/* /*

View File

@ -494,12 +494,19 @@ static void apic_send_ipi(struct kvm_lapic *apic)
static u32 apic_get_tmcct(struct kvm_lapic *apic) static u32 apic_get_tmcct(struct kvm_lapic *apic)
{ {
u32 counter_passed; u64 counter_passed;
ktime_t passed, now = apic->timer.dev.base->get_time(); ktime_t passed, now;
u32 tmcct = apic_get_reg(apic, APIC_TMICT); u32 tmcct;
ASSERT(apic != NULL); ASSERT(apic != NULL);
now = apic->timer.dev.base->get_time();
tmcct = apic_get_reg(apic, APIC_TMICT);
/* if initial count is 0, current count should also be 0 */
if (tmcct == 0)
return 0;
if (unlikely(ktime_to_ns(now) <= if (unlikely(ktime_to_ns(now) <=
ktime_to_ns(apic->timer.last_update))) { ktime_to_ns(apic->timer.last_update))) {
/* Wrap around */ /* Wrap around */
@ -514,15 +521,24 @@ static u32 apic_get_tmcct(struct kvm_lapic *apic)
counter_passed = div64_64(ktime_to_ns(passed), counter_passed = div64_64(ktime_to_ns(passed),
(APIC_BUS_CYCLE_NS * apic->timer.divide_count)); (APIC_BUS_CYCLE_NS * apic->timer.divide_count));
tmcct -= counter_passed;
if (tmcct <= 0) { if (counter_passed > tmcct) {
if (unlikely(!apic_lvtt_period(apic))) if (unlikely(!apic_lvtt_period(apic))) {
/* one-shot timers stick at 0 until reset */
tmcct = 0; tmcct = 0;
else } else {
do { /*
tmcct += apic_get_reg(apic, APIC_TMICT); * periodic timers reset to APIC_TMICT when they
} while (tmcct <= 0); * hit 0. The while loop simulates this happening N
* times. (counter_passed %= tmcct) would also work,
* but might be slower or not work on 32-bit??
*/
while (counter_passed > tmcct)
counter_passed -= tmcct;
tmcct -= counter_passed;
}
} else {
tmcct -= counter_passed;
} }
return tmcct; return tmcct;
@ -853,7 +869,7 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu)
apic_set_reg(apic, APIC_ISR + 0x10 * i, 0); apic_set_reg(apic, APIC_ISR + 0x10 * i, 0);
apic_set_reg(apic, APIC_TMR + 0x10 * i, 0); apic_set_reg(apic, APIC_TMR + 0x10 * i, 0);
} }
apic->timer.divide_count = 0; update_divide_count(apic);
atomic_set(&apic->timer.pending, 0); atomic_set(&apic->timer.pending, 0);
if (vcpu->vcpu_id == 0) if (vcpu->vcpu_id == 0)
vcpu->apic_base |= MSR_IA32_APICBASE_BSP; vcpu->apic_base |= MSR_IA32_APICBASE_BSP;

View File

@ -1049,6 +1049,7 @@ int kvm_mmu_reset_context(struct kvm_vcpu *vcpu)
destroy_kvm_mmu(vcpu); destroy_kvm_mmu(vcpu);
return init_kvm_mmu(vcpu); return init_kvm_mmu(vcpu);
} }
EXPORT_SYMBOL_GPL(kvm_mmu_reset_context);
int kvm_mmu_load(struct kvm_vcpu *vcpu) int kvm_mmu_load(struct kvm_vcpu *vcpu)
{ {
@ -1088,7 +1089,7 @@ static void mmu_pte_write_zap_pte(struct kvm_vcpu *vcpu,
mmu_page_remove_parent_pte(child, spte); mmu_page_remove_parent_pte(child, spte);
} }
} }
*spte = 0; set_shadow_pte(spte, 0);
kvm_flush_remote_tlbs(vcpu->kvm); kvm_flush_remote_tlbs(vcpu->kvm);
} }

View File

@ -523,6 +523,8 @@ static unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu)
static void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) static void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
{ {
if (vcpu->rmode.active)
rflags |= IOPL_MASK | X86_EFLAGS_VM;
vmcs_writel(GUEST_RFLAGS, rflags); vmcs_writel(GUEST_RFLAGS, rflags);
} }
@ -1128,6 +1130,7 @@ static void enter_rmode(struct kvm_vcpu *vcpu)
fix_rmode_seg(VCPU_SREG_GS, &vcpu->rmode.gs); fix_rmode_seg(VCPU_SREG_GS, &vcpu->rmode.gs);
fix_rmode_seg(VCPU_SREG_FS, &vcpu->rmode.fs); fix_rmode_seg(VCPU_SREG_FS, &vcpu->rmode.fs);
kvm_mmu_reset_context(vcpu);
init_rmode_tss(vcpu->kvm); init_rmode_tss(vcpu->kvm);
} }
@ -1760,10 +1763,8 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
set_bit(irq / BITS_PER_LONG, &vcpu->irq_summary); set_bit(irq / BITS_PER_LONG, &vcpu->irq_summary);
} }
if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == 0x200) { /* nmi */ if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == 0x200) /* nmi */
asm ("int $2"); return 1; /* already handled by vmx_vcpu_run() */
return 1;
}
if (is_no_device(intr_info)) { if (is_no_device(intr_info)) {
vmx_fpu_activate(vcpu); vmx_fpu_activate(vcpu);
@ -2196,6 +2197,7 @@ static void vmx_intr_assist(struct kvm_vcpu *vcpu)
static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
{ {
struct vcpu_vmx *vmx = to_vmx(vcpu); struct vcpu_vmx *vmx = to_vmx(vcpu);
u32 intr_info;
/* /*
* Loading guest fpu may have cleared host cr0.ts * Loading guest fpu may have cleared host cr0.ts
@ -2322,6 +2324,12 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
asm ("mov %0, %%ds; mov %0, %%es" : : "r"(__USER_DS)); asm ("mov %0, %%ds; mov %0, %%es" : : "r"(__USER_DS));
vmx->launched = 1; vmx->launched = 1;
intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
/* We need to handle NMIs before interrupts are enabled */
if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == 0x200) /* nmi */
asm("int $2");
} }
static void vmx_inject_page_fault(struct kvm_vcpu *vcpu, static void vmx_inject_page_fault(struct kvm_vcpu *vcpu,

View File

@ -212,7 +212,8 @@ static u16 twobyte_table[256] = {
0, 0, ByteOp | DstReg | SrcMem | ModRM | Mov, 0, 0, ByteOp | DstReg | SrcMem | ModRM | Mov,
DstReg | SrcMem16 | ModRM | Mov, DstReg | SrcMem16 | ModRM | Mov,
/* 0xC0 - 0xCF */ /* 0xC0 - 0xCF */
0, 0, 0, 0, 0, 0, 0, ImplicitOps | ModRM, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, DstMem | SrcReg | ModRM | Mov, 0, 0, 0, ImplicitOps | ModRM,
0, 0, 0, 0, 0, 0, 0, 0,
/* 0xD0 - 0xDF */ /* 0xD0 - 0xDF */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
/* 0xE0 - 0xEF */ /* 0xE0 - 0xEF */
@ -596,11 +597,10 @@ x86_emulate_memop(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
case 0xf0: /* LOCK */ case 0xf0: /* LOCK */
lock_prefix = 1; lock_prefix = 1;
break; break;
case 0xf2: /* REPNE/REPNZ */
case 0xf3: /* REP/REPE/REPZ */ case 0xf3: /* REP/REPE/REPZ */
rep_prefix = 1; rep_prefix = 1;
break; break;
case 0xf2: /* REPNE/REPNZ */
break;
default: default:
goto done_prefixes; goto done_prefixes;
} }
@ -825,6 +825,14 @@ done_prefixes:
if (twobyte && b == 0x01 && modrm_reg == 7) if (twobyte && b == 0x01 && modrm_reg == 7)
break; break;
srcmem_common: srcmem_common:
/*
* For instructions with a ModR/M byte, switch to register
* access if Mod = 3.
*/
if ((d & ModRM) && modrm_mod == 3) {
src.type = OP_REG;
break;
}
src.type = OP_MEM; src.type = OP_MEM;
src.ptr = (unsigned long *)cr2; src.ptr = (unsigned long *)cr2;
src.val = 0; src.val = 0;
@ -893,6 +901,14 @@ done_prefixes:
dst.ptr = (unsigned long *)cr2; dst.ptr = (unsigned long *)cr2;
dst.bytes = (d & ByteOp) ? 1 : op_bytes; dst.bytes = (d & ByteOp) ? 1 : op_bytes;
dst.val = 0; dst.val = 0;
/*
* For instructions with a ModR/M byte, switch to register
* access if Mod = 3.
*/
if ((d & ModRM) && modrm_mod == 3) {
dst.type = OP_REG;
break;
}
if (d & BitOp) { if (d & BitOp) {
unsigned long mask = ~(dst.bytes * 8 - 1); unsigned long mask = ~(dst.bytes * 8 - 1);
@ -1083,31 +1099,6 @@ push:
case 0xd2 ... 0xd3: /* Grp2 */ case 0xd2 ... 0xd3: /* Grp2 */
src.val = _regs[VCPU_REGS_RCX]; src.val = _regs[VCPU_REGS_RCX];
goto grp2; goto grp2;
case 0xe8: /* call (near) */ {
long int rel;
switch (op_bytes) {
case 2:
rel = insn_fetch(s16, 2, _eip);
break;
case 4:
rel = insn_fetch(s32, 4, _eip);
break;
case 8:
rel = insn_fetch(s64, 8, _eip);
break;
default:
DPRINTF("Call: Invalid op_bytes\n");
goto cannot_emulate;
}
src.val = (unsigned long) _eip;
JMP_REL(rel);
goto push;
}
case 0xe9: /* jmp rel */
case 0xeb: /* jmp rel short */
JMP_REL(src.val);
no_wb = 1; /* Disable writeback. */
break;
case 0xf6 ... 0xf7: /* Grp3 */ case 0xf6 ... 0xf7: /* Grp3 */
switch (modrm_reg) { switch (modrm_reg) {
case 0 ... 1: /* test */ case 0 ... 1: /* test */
@ -1350,6 +1341,32 @@ special_insn:
case 0xae ... 0xaf: /* scas */ case 0xae ... 0xaf: /* scas */
DPRINTF("Urk! I don't handle SCAS.\n"); DPRINTF("Urk! I don't handle SCAS.\n");
goto cannot_emulate; goto cannot_emulate;
case 0xe8: /* call (near) */ {
long int rel;
switch (op_bytes) {
case 2:
rel = insn_fetch(s16, 2, _eip);
break;
case 4:
rel = insn_fetch(s32, 4, _eip);
break;
case 8:
rel = insn_fetch(s64, 8, _eip);
break;
default:
DPRINTF("Call: Invalid op_bytes\n");
goto cannot_emulate;
}
src.val = (unsigned long) _eip;
JMP_REL(rel);
goto push;
}
case 0xe9: /* jmp rel */
case 0xeb: /* jmp rel short */
JMP_REL(src.val);
no_wb = 1; /* Disable writeback. */
break;
} }
goto writeback; goto writeback;
@ -1501,6 +1518,10 @@ twobyte_insn:
dst.bytes = op_bytes; dst.bytes = op_bytes;
dst.val = (d & ByteOp) ? (s8) src.val : (s16) src.val; dst.val = (d & ByteOp) ? (s8) src.val : (s16) src.val;
break; break;
case 0xc3: /* movnti */
dst.bytes = op_bytes;
dst.val = (op_bytes == 4) ? (u32) src.val : (u64) src.val;
break;
} }
goto writeback; goto writeback;

View File

@ -3375,7 +3375,6 @@ void account_system_time(struct task_struct *p, int hardirq_offset,
if (p->flags & PF_VCPU) { if (p->flags & PF_VCPU) {
account_guest_time(p, cputime); account_guest_time(p, cputime);
p->flags &= ~PF_VCPU;
return; return;
} }