2013-01-24 02:21:58 +08:00
|
|
|
/*
|
|
|
|
* Copyright (C) 2012 ARM Ltd.
|
|
|
|
* Author: Marc Zyngier <marc.zyngier@arm.com>
|
|
|
|
*
|
|
|
|
* This program is free software; you can redistribute it and/or modify
|
|
|
|
* it under the terms of the GNU General Public License version 2 as
|
|
|
|
* published by the Free Software Foundation.
|
|
|
|
*
|
|
|
|
* This program is distributed in the hope that it will be useful,
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
* GNU General Public License for more details.
|
|
|
|
*
|
|
|
|
* You should have received a copy of the GNU General Public License
|
|
|
|
* along with this program; if not, write to the Free Software
|
|
|
|
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
|
|
*/
|
|
|
|
|
|
|
|
#ifndef __ASM_ARM_KVM_ARCH_TIMER_H
|
|
|
|
#define __ASM_ARM_KVM_ARCH_TIMER_H
|
|
|
|
|
|
|
|
#include <linux/clocksource.h>
|
|
|
|
#include <linux/hrtimer.h>
|
|
|
|
#include <linux/workqueue.h>
|
|
|
|
|
|
|
|
struct arch_timer_kvm {
|
|
|
|
/* Is the timer enabled */
|
|
|
|
bool enabled;
|
|
|
|
|
|
|
|
/* Virtual offset */
|
|
|
|
cycle_t cntvoff;
|
|
|
|
};
|
|
|
|
|
|
|
|
struct arch_timer_cpu {
|
|
|
|
/* Registers: control register, timer value */
|
|
|
|
u32 cntv_ctl; /* Saved/restored */
|
|
|
|
cycle_t cntv_cval; /* Saved/restored */
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Anything that is not used directly from assembly code goes
|
|
|
|
* here.
|
|
|
|
*/
|
|
|
|
|
|
|
|
/* Background timer used when the guest is not running */
|
|
|
|
struct hrtimer timer;
|
|
|
|
|
|
|
|
/* Work queued with the above timer expires */
|
|
|
|
struct work_struct expired;
|
|
|
|
|
|
|
|
/* Background timer active */
|
|
|
|
bool armed;
|
|
|
|
|
|
|
|
/* Timer IRQ */
|
arm/arm64: KVM: Rework the arch timer to use level-triggered semantics
The arch timer currently uses edge-triggered semantics in the sense that
the line is never sampled by the vgic and lowering the line from the
timer to the vgic doesn't have any effect on the pending state of
virtual interrupts in the vgic. This means that we do not support a
guest with the otherwise valid behavior of (1) disable interrupts (2)
enable the timer (3) disable the timer (4) enable interrupts. Such a
guest would validly not expect to see any interrupts on real hardware,
but will see interrupts on KVM.
This patch fixes this shortcoming through the following series of
changes.
First, we change the flow of the timer/vgic sync/flush operations. Now
the timer is always flushed/synced before the vgic, because the vgic
samples the state of the timer output. This has the implication that we
move the timer operations in to non-preempible sections, but that is
fine after the previous commit getting rid of hrtimer schedules on every
entry/exit.
Second, we change the internal behavior of the timer, letting the timer
keep track of its previous output state, and only lower/raise the line
to the vgic when the state changes. Note that in theory this could have
been accomplished more simply by signalling the vgic every time the
state *potentially* changed, but we don't want to be hitting the vgic
more often than necessary.
Third, we get rid of the use of the map->active field in the vgic and
instead simply set the interrupt as active on the physical distributor
whenever the input to the GIC is asserted and conversely clear the
physical active state when the input to the GIC is deasserted.
Fourth, and finally, we now initialize the timer PPIs (and all the other
unused PPIs for now), to be level-triggered, and modify the sync code to
sample the line state on HW sync and re-inject a new interrupt if it is
still pending at that time.
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
2015-08-30 21:01:27 +08:00
|
|
|
struct kvm_irq_level irq;
|
2014-06-23 20:59:13 +08:00
|
|
|
|
|
|
|
/* VGIC mapping */
|
|
|
|
struct irq_phys_map *map;
|
2013-01-24 02:21:58 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
int kvm_timer_hyp_init(void);
|
2014-12-13 04:19:23 +08:00
|
|
|
void kvm_timer_enable(struct kvm *kvm);
|
|
|
|
void kvm_timer_init(struct kvm *kvm);
|
2014-06-23 20:59:13 +08:00
|
|
|
int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu,
|
|
|
|
const struct kvm_irq_level *irq);
|
2013-01-24 02:21:58 +08:00
|
|
|
void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu);
|
|
|
|
void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu);
|
|
|
|
void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu);
|
|
|
|
void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu);
|
2014-07-04 22:54:14 +08:00
|
|
|
|
|
|
|
u64 kvm_arm_timer_get_reg(struct kvm_vcpu *, u64 regid);
|
|
|
|
int kvm_arm_timer_set_reg(struct kvm_vcpu *, u64 regid, u64 value);
|
|
|
|
|
arm/arm64: KVM: Fix migration race in the arch timer
When a VCPU is no longer running, we currently check to see if it has a
timer scheduled in the future, and if it does, we schedule a host
hrtimer to notify is in case the timer expires while the VCPU is still
not running. When the hrtimer fires, we mask the guest's timer and
inject the timer IRQ (still relying on the guest unmasking the time when
it receives the IRQ).
This is all good and fine, but when migration a VM (checkpoint/restore)
this introduces a race. It is unlikely, but possible, for the following
sequence of events to happen:
1. Userspace stops the VM
2. Hrtimer for VCPU is scheduled
3. Userspace checkpoints the VGIC state (no pending timer interrupts)
4. The hrtimer fires, schedules work in a workqueue
5. Workqueue function runs, masks the timer and injects timer interrupt
6. Userspace checkpoints the timer state (timer masked)
At restore time, you end up with a masked timer without any timer
interrupts and your guest halts never receiving timer interrupts.
Fix this by only kicking the VCPU in the workqueue function, and sample
the expired state of the timer when entering the guest again and inject
the interrupt and mask the timer only then.
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
2015-03-14 01:02:55 +08:00
|
|
|
bool kvm_timer_should_fire(struct kvm_vcpu *vcpu);
|
arm/arm64: KVM: arch_timer: Only schedule soft timer on vcpu_block
We currently schedule a soft timer every time we exit the guest if the
timer did not expire while running the guest. This is really not
necessary, because the only work we do in the timer work function is to
kick the vcpu.
Kicking the vcpu does two things:
(1) If the vpcu thread is on a waitqueue, make it runnable and remove it
from the waitqueue.
(2) If the vcpu is running on a different physical CPU from the one
doing the kick, it sends a reschedule IPI.
The second case cannot happen, because the soft timer is only ever
scheduled when the vcpu is not running. The first case is only relevant
when the vcpu thread is on a waitqueue, which is only the case when the
vcpu thread has called kvm_vcpu_block().
Therefore, we only need to make sure a timer is scheduled for
kvm_vcpu_block(), which we do by encapsulating all calls to
kvm_vcpu_block() with kvm_timer_{un}schedule calls.
Additionally, we only schedule a soft timer if the timer is enabled and
unmasked, since it is useless otherwise.
Note that theoretically userspace can use the SET_ONE_REG interface to
change registers that should cause the timer to fire, even if the vcpu
is blocked without a scheduled timer, but this case was not supported
before this patch and we leave it for future work for now.
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
2015-08-26 01:48:21 +08:00
|
|
|
void kvm_timer_schedule(struct kvm_vcpu *vcpu);
|
|
|
|
void kvm_timer_unschedule(struct kvm_vcpu *vcpu);
|
arm/arm64: KVM: Fix migration race in the arch timer
When a VCPU is no longer running, we currently check to see if it has a
timer scheduled in the future, and if it does, we schedule a host
hrtimer to notify is in case the timer expires while the VCPU is still
not running. When the hrtimer fires, we mask the guest's timer and
inject the timer IRQ (still relying on the guest unmasking the time when
it receives the IRQ).
This is all good and fine, but when migration a VM (checkpoint/restore)
this introduces a race. It is unlikely, but possible, for the following
sequence of events to happen:
1. Userspace stops the VM
2. Hrtimer for VCPU is scheduled
3. Userspace checkpoints the VGIC state (no pending timer interrupts)
4. The hrtimer fires, schedules work in a workqueue
5. Workqueue function runs, masks the timer and injects timer interrupt
6. Userspace checkpoints the timer state (timer masked)
At restore time, you end up with a masked timer without any timer
interrupts and your guest halts never receiving timer interrupts.
Fix this by only kicking the VCPU in the workqueue function, and sample
the expired state of the timer when entering the guest again and inject
the interrupt and mask the timer only then.
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
2015-03-14 01:02:55 +08:00
|
|
|
|
2013-01-24 02:21:58 +08:00
|
|
|
#endif
|