KVM: PPC: Book3S HV: Simplify locking around stolen time calculations

Currently the calculations of stolen time for PPC Book3S HV guests
uses fields in both the vcpu struct and the kvmppc_vcore struct.  The
fields in the kvmppc_vcore struct are protected by the
vcpu->arch.tbacct_lock of the vcpu that has taken responsibility for
running the virtual core.  This works correctly but confuses lockdep,
because it sees that the code takes the tbacct_lock for a vcpu in
kvmppc_remove_runnable() and then takes another vcpu's tbacct_lock in
vcore_stolen_time(), and it thinks there is a possibility of deadlock,
causing it to print reports like this:

=============================================
[ INFO: possible recursive locking detected ]
3.18.0-rc7-kvm-00016-g8db4bc6 #89 Not tainted
---------------------------------------------
qemu-system-ppc/6188 is trying to acquire lock:
 (&(&vcpu->arch.tbacct_lock)->rlock){......}, at: [<d00000000ecb1fe8>] .vcore_stolen_time+0x48/0xd0 [kvm_hv]

but task is already holding lock:
 (&(&vcpu->arch.tbacct_lock)->rlock){......}, at: [<d00000000ecb25a0>] .kvmppc_remove_runnable.part.3+0x30/0xd0 [kvm_hv]

other info that might help us debug this:
 Possible unsafe locking scenario:

       CPU0
       ----
  lock(&(&vcpu->arch.tbacct_lock)->rlock);
  lock(&(&vcpu->arch.tbacct_lock)->rlock);

 *** DEADLOCK ***

 May be due to missing lock nesting notation

3 locks held by qemu-system-ppc/6188:
 #0:  (&vcpu->mutex){+.+.+.}, at: [<d00000000eb93f98>] .vcpu_load+0x28/0xe0 [kvm]
 #1:  (&(&vcore->lock)->rlock){+.+...}, at: [<d00000000ecb41b0>] .kvmppc_vcpu_run_hv+0x530/0x1530 [kvm_hv]
 #2:  (&(&vcpu->arch.tbacct_lock)->rlock){......}, at: [<d00000000ecb25a0>] .kvmppc_remove_runnable.part.3+0x30/0xd0 [kvm_hv]

stack backtrace:
CPU: 40 PID: 6188 Comm: qemu-system-ppc Not tainted 3.18.0-rc7-kvm-00016-g8db4bc6 #89
Call Trace:
[c000000b2754f3f0] [c000000000b31b6c] .dump_stack+0x88/0xb4 (unreliable)
[c000000b2754f470] [c0000000000faeb8] .__lock_acquire+0x1878/0x2190
[c000000b2754f600] [c0000000000fbf0c] .lock_acquire+0xcc/0x1a0
[c000000b2754f6d0] [c000000000b2954c] ._raw_spin_lock_irq+0x4c/0x70
[c000000b2754f760] [d00000000ecb1fe8] .vcore_stolen_time+0x48/0xd0 [kvm_hv]
[c000000b2754f7f0] [d00000000ecb25b4] .kvmppc_remove_runnable.part.3+0x44/0xd0 [kvm_hv]
[c000000b2754f880] [d00000000ecb43ec] .kvmppc_vcpu_run_hv+0x76c/0x1530 [kvm_hv]
[c000000b2754f9f0] [d00000000eb9f46c] .kvmppc_vcpu_run+0x2c/0x40 [kvm]
[c000000b2754fa60] [d00000000eb9c9a4] .kvm_arch_vcpu_ioctl_run+0x54/0x160 [kvm]
[c000000b2754faf0] [d00000000eb94538] .kvm_vcpu_ioctl+0x498/0x760 [kvm]
[c000000b2754fcb0] [c000000000267eb4] .do_vfs_ioctl+0x444/0x770
[c000000b2754fd90] [c0000000002682a4] .SyS_ioctl+0xc4/0xe0
[c000000b2754fe30] [c0000000000092e4] syscall_exit+0x0/0x98

In order to make the locking easier to analyse, we change the code to
use a spinlock in the kvmppc_vcore struct to protect the stolen_tb and
preempt_tb fields.  This lock needs to be an irq-safe lock since it is
used in the kvmppc_core_vcpu_load_hv() and kvmppc_core_vcpu_put_hv()
functions, which are called with the scheduler rq lock held, which is
an irq-safe lock.

Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
This commit is contained in:
Paul Mackerras 2014-12-04 16:43:28 +11:00 committed by Alexander Graf
parent a0499cf746
commit 2711e248a3
2 changed files with 32 additions and 29 deletions

View File

@ -297,6 +297,7 @@ struct kvmppc_vcore {
struct list_head runnable_threads; struct list_head runnable_threads;
spinlock_t lock; spinlock_t lock;
wait_queue_head_t wq; wait_queue_head_t wq;
spinlock_t stoltb_lock; /* protects stolen_tb and preempt_tb */
u64 stolen_tb; u64 stolen_tb;
u64 preempt_tb; u64 preempt_tb;
struct kvm_vcpu *runner; struct kvm_vcpu *runner;

View File

@ -135,11 +135,10 @@ static void kvmppc_fast_vcpu_kick_hv(struct kvm_vcpu *vcpu)
* stolen. * stolen.
* *
* Updates to busy_stolen are protected by arch.tbacct_lock; * Updates to busy_stolen are protected by arch.tbacct_lock;
* updates to vc->stolen_tb are protected by the arch.tbacct_lock * updates to vc->stolen_tb are protected by the vcore->stoltb_lock
* of the vcpu that has taken responsibility for running the vcore * lock. The stolen times are measured in units of timebase ticks.
* (i.e. vc->runner). The stolen times are measured in units of * (Note that the != TB_NIL checks below are purely defensive;
* timebase ticks. (Note that the != TB_NIL checks below are * they should never fail.)
* purely defensive; they should never fail.)
*/ */
static void kvmppc_core_vcpu_load_hv(struct kvm_vcpu *vcpu, int cpu) static void kvmppc_core_vcpu_load_hv(struct kvm_vcpu *vcpu, int cpu)
@ -147,12 +146,21 @@ static void kvmppc_core_vcpu_load_hv(struct kvm_vcpu *vcpu, int cpu)
struct kvmppc_vcore *vc = vcpu->arch.vcore; struct kvmppc_vcore *vc = vcpu->arch.vcore;
unsigned long flags; unsigned long flags;
spin_lock_irqsave(&vcpu->arch.tbacct_lock, flags); /*
if (vc->runner == vcpu && vc->vcore_state != VCORE_INACTIVE && * We can test vc->runner without taking the vcore lock,
vc->preempt_tb != TB_NIL) { * because only this task ever sets vc->runner to this
vc->stolen_tb += mftb() - vc->preempt_tb; * vcpu, and once it is set to this vcpu, only this task
vc->preempt_tb = TB_NIL; * ever sets it to NULL.
*/
if (vc->runner == vcpu && vc->vcore_state != VCORE_INACTIVE) {
spin_lock_irqsave(&vc->stoltb_lock, flags);
if (vc->preempt_tb != TB_NIL) {
vc->stolen_tb += mftb() - vc->preempt_tb;
vc->preempt_tb = TB_NIL;
}
spin_unlock_irqrestore(&vc->stoltb_lock, flags);
} }
spin_lock_irqsave(&vcpu->arch.tbacct_lock, flags);
if (vcpu->arch.state == KVMPPC_VCPU_BUSY_IN_HOST && if (vcpu->arch.state == KVMPPC_VCPU_BUSY_IN_HOST &&
vcpu->arch.busy_preempt != TB_NIL) { vcpu->arch.busy_preempt != TB_NIL) {
vcpu->arch.busy_stolen += mftb() - vcpu->arch.busy_preempt; vcpu->arch.busy_stolen += mftb() - vcpu->arch.busy_preempt;
@ -166,9 +174,12 @@ static void kvmppc_core_vcpu_put_hv(struct kvm_vcpu *vcpu)
struct kvmppc_vcore *vc = vcpu->arch.vcore; struct kvmppc_vcore *vc = vcpu->arch.vcore;
unsigned long flags; unsigned long flags;
spin_lock_irqsave(&vcpu->arch.tbacct_lock, flags); if (vc->runner == vcpu && vc->vcore_state != VCORE_INACTIVE) {
if (vc->runner == vcpu && vc->vcore_state != VCORE_INACTIVE) spin_lock_irqsave(&vc->stoltb_lock, flags);
vc->preempt_tb = mftb(); vc->preempt_tb = mftb();
spin_unlock_irqrestore(&vc->stoltb_lock, flags);
}
spin_lock_irqsave(&vcpu->arch.tbacct_lock, flags);
if (vcpu->arch.state == KVMPPC_VCPU_BUSY_IN_HOST) if (vcpu->arch.state == KVMPPC_VCPU_BUSY_IN_HOST)
vcpu->arch.busy_preempt = mftb(); vcpu->arch.busy_preempt = mftb();
spin_unlock_irqrestore(&vcpu->arch.tbacct_lock, flags); spin_unlock_irqrestore(&vcpu->arch.tbacct_lock, flags);
@ -505,25 +516,14 @@ static void kvmppc_update_vpas(struct kvm_vcpu *vcpu)
static u64 vcore_stolen_time(struct kvmppc_vcore *vc, u64 now) static u64 vcore_stolen_time(struct kvmppc_vcore *vc, u64 now)
{ {
u64 p; u64 p;
unsigned long flags;
/* spin_lock_irqsave(&vc->stoltb_lock, flags);
* If we are the task running the vcore, then since we hold p = vc->stolen_tb;
* the vcore lock, we can't be preempted, so stolen_tb/preempt_tb
* can't be updated, so we don't need the tbacct_lock.
* If the vcore is inactive, it can't become active (since we
* hold the vcore lock), so the vcpu load/put functions won't
* update stolen_tb/preempt_tb, and we don't need tbacct_lock.
*/
if (vc->vcore_state != VCORE_INACTIVE && if (vc->vcore_state != VCORE_INACTIVE &&
vc->runner->arch.run_task != current) { vc->preempt_tb != TB_NIL)
spin_lock_irq(&vc->runner->arch.tbacct_lock); p += now - vc->preempt_tb;
p = vc->stolen_tb; spin_unlock_irqrestore(&vc->stoltb_lock, flags);
if (vc->preempt_tb != TB_NIL)
p += now - vc->preempt_tb;
spin_unlock_irq(&vc->runner->arch.tbacct_lock);
} else {
p = vc->stolen_tb;
}
return p; return p;
} }
@ -1359,6 +1359,7 @@ static struct kvmppc_vcore *kvmppc_vcore_create(struct kvm *kvm, int core)
INIT_LIST_HEAD(&vcore->runnable_threads); INIT_LIST_HEAD(&vcore->runnable_threads);
spin_lock_init(&vcore->lock); spin_lock_init(&vcore->lock);
spin_lock_init(&vcore->stoltb_lock);
init_waitqueue_head(&vcore->wq); init_waitqueue_head(&vcore->wq);
vcore->preempt_tb = TB_NIL; vcore->preempt_tb = TB_NIL;
vcore->lpcr = kvm->arch.lpcr; vcore->lpcr = kvm->arch.lpcr;
@ -1696,6 +1697,7 @@ static void kvmppc_run_core(struct kvmppc_vcore *vc)
vc->n_woken = 0; vc->n_woken = 0;
vc->nap_count = 0; vc->nap_count = 0;
vc->entry_exit_count = 0; vc->entry_exit_count = 0;
vc->preempt_tb = TB_NIL;
vc->vcore_state = VCORE_STARTING; vc->vcore_state = VCORE_STARTING;
vc->in_guest = 0; vc->in_guest = 0;
vc->napping_threads = 0; vc->napping_threads = 0;