Fixes for PPC KVM:
- Close a hole which could possibly lead to the host timebase getting out of sync. - Three fixes relating to PTEs and TLB entries for radix guests. - Fix a bug which could lead to an interrupt never getting delivered to the guest, if it is pending for a guest vCPU when the vCPU gets offlined. -----BEGIN PGP SIGNATURE----- iQFGBAABCgAwFiEEv0VLfXa2m9eKuaRpnZrqdyxjcZ8FAlsGTWMSHHBhdWx1c0Bv emxhYnMub3JnAAoJEJ2a6ncsY3GfPKQH/3dopz+qjpZqvhgvqfC0wkLlGLcTxmKK +y77M5YStFEeytYB52hyrAs4KptM1If5+BfShX4tTzGY5MGS4RMvzY7tLNzLlmFg S/ghzlFCh4dIz+LTk58FIyFmyn7GrvJRP33FoiAPCCp1AkRL7MlSD5cu3N6fHo6P GU5lHLLyaGEIkC4KxLQdr4smV3tKNk1k6iz4eMHwDOeLoxcLnz0LbiM7xBr/Txmu miF68B29hU/peKM/GbtSAh5TpWY6WlcPTBUEiHXghcuYmXqgW43fjGleuL330mN4 HtSONLuapa6VNSJy3UuGBlI1puIEbUrtTPfy0UxKQG3Em7L8UnxO2wk= =7/7b -----END PGP SIGNATURE----- Merge tag 'kvm-ppc-fixes-4.17-1' of git://git.kernel.org/pub/scm/linux/kernel/git/paulus/powerpc Fixes for PPC KVM: - Close a hole which could possibly lead to the host timebase getting out of sync. - Three fixes relating to PTEs and TLB entries for radix guests. - Fix a bug which could lead to an interrupt never getting delivered to the guest, if it is pending for a guest vCPU when the vCPU gets offlined.
This commit is contained in:
commit
b09efdc250
|
@ -96,6 +96,7 @@ struct kvmppc_vcore {
|
|||
struct kvm_vcpu *runner;
|
||||
struct kvm *kvm;
|
||||
u64 tb_offset; /* guest timebase - host timebase */
|
||||
u64 tb_offset_applied; /* timebase offset currently in force */
|
||||
ulong lpcr;
|
||||
u32 arch_compat;
|
||||
ulong pcr;
|
||||
|
|
|
@ -562,6 +562,7 @@ int main(void)
|
|||
OFFSET(VCORE_NAPPING_THREADS, kvmppc_vcore, napping_threads);
|
||||
OFFSET(VCORE_KVM, kvmppc_vcore, kvm);
|
||||
OFFSET(VCORE_TB_OFFSET, kvmppc_vcore, tb_offset);
|
||||
OFFSET(VCORE_TB_OFFSET_APPL, kvmppc_vcore, tb_offset_applied);
|
||||
OFFSET(VCORE_LPCR, kvmppc_vcore, lpcr);
|
||||
OFFSET(VCORE_PCR, kvmppc_vcore, pcr);
|
||||
OFFSET(VCORE_DPDES, kvmppc_vcore, dpdes);
|
||||
|
|
|
@ -162,7 +162,7 @@ static void kvmppc_radix_tlbie_page(struct kvm *kvm, unsigned long addr,
|
|||
if (cpu_has_feature(CPU_FTR_P9_TLBIE_BUG))
|
||||
asm volatile(PPC_TLBIE_5(%0, %1, 0, 0, 1)
|
||||
: : "r" (addr), "r" (kvm->arch.lpid) : "memory");
|
||||
asm volatile("ptesync": : :"memory");
|
||||
asm volatile("eieio ; tlbsync ; ptesync": : :"memory");
|
||||
}
|
||||
|
||||
static void kvmppc_radix_flush_pwc(struct kvm *kvm, unsigned long addr)
|
||||
|
@ -173,7 +173,7 @@ static void kvmppc_radix_flush_pwc(struct kvm *kvm, unsigned long addr)
|
|||
/* RIC=1 PRS=0 R=1 IS=2 */
|
||||
asm volatile(PPC_TLBIE_5(%0, %1, 1, 0, 1)
|
||||
: : "r" (rb), "r" (kvm->arch.lpid) : "memory");
|
||||
asm volatile("ptesync": : :"memory");
|
||||
asm volatile("eieio ; tlbsync ; ptesync": : :"memory");
|
||||
}
|
||||
|
||||
unsigned long kvmppc_radix_update_pte(struct kvm *kvm, pte_t *ptep,
|
||||
|
@ -584,7 +584,7 @@ int kvm_unmap_radix(struct kvm *kvm, struct kvm_memory_slot *memslot,
|
|||
|
||||
ptep = __find_linux_pte(kvm->arch.pgtable, gpa, NULL, &shift);
|
||||
if (ptep && pte_present(*ptep)) {
|
||||
old = kvmppc_radix_update_pte(kvm, ptep, _PAGE_PRESENT, 0,
|
||||
old = kvmppc_radix_update_pte(kvm, ptep, ~0UL, 0,
|
||||
gpa, shift);
|
||||
kvmppc_radix_tlbie_page(kvm, gpa, shift);
|
||||
if ((old & _PAGE_DIRTY) && memslot->dirty_bitmap) {
|
||||
|
|
|
@ -2441,6 +2441,7 @@ static void init_vcore_to_run(struct kvmppc_vcore *vc)
|
|||
vc->in_guest = 0;
|
||||
vc->napping_threads = 0;
|
||||
vc->conferring_threads = 0;
|
||||
vc->tb_offset_applied = 0;
|
||||
}
|
||||
|
||||
static bool can_dynamic_split(struct kvmppc_vcore *vc, struct core_info *cip)
|
||||
|
|
|
@ -692,6 +692,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
|
|||
22: ld r8,VCORE_TB_OFFSET(r5)
|
||||
cmpdi r8,0
|
||||
beq 37f
|
||||
std r8, VCORE_TB_OFFSET_APPL(r5)
|
||||
mftb r6 /* current host timebase */
|
||||
add r8,r8,r6
|
||||
mtspr SPRN_TBU40,r8 /* update upper 40 bits */
|
||||
|
@ -940,18 +941,6 @@ FTR_SECTION_ELSE
|
|||
ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
|
||||
8:
|
||||
|
||||
/*
|
||||
* Set the decrementer to the guest decrementer.
|
||||
*/
|
||||
ld r8,VCPU_DEC_EXPIRES(r4)
|
||||
/* r8 is a host timebase value here, convert to guest TB */
|
||||
ld r5,HSTATE_KVM_VCORE(r13)
|
||||
ld r6,VCORE_TB_OFFSET(r5)
|
||||
add r8,r8,r6
|
||||
mftb r7
|
||||
subf r3,r7,r8
|
||||
mtspr SPRN_DEC,r3
|
||||
|
||||
ld r5, VCPU_SPRG0(r4)
|
||||
ld r6, VCPU_SPRG1(r4)
|
||||
ld r7, VCPU_SPRG2(r4)
|
||||
|
@ -1005,6 +994,18 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
|
|||
mtspr SPRN_LPCR,r8
|
||||
isync
|
||||
|
||||
/*
|
||||
* Set the decrementer to the guest decrementer.
|
||||
*/
|
||||
ld r8,VCPU_DEC_EXPIRES(r4)
|
||||
/* r8 is a host timebase value here, convert to guest TB */
|
||||
ld r5,HSTATE_KVM_VCORE(r13)
|
||||
ld r6,VCORE_TB_OFFSET_APPL(r5)
|
||||
add r8,r8,r6
|
||||
mftb r7
|
||||
subf r3,r7,r8
|
||||
mtspr SPRN_DEC,r3
|
||||
|
||||
/* Check if HDEC expires soon */
|
||||
mfspr r3, SPRN_HDEC
|
||||
EXTEND_HDEC(r3)
|
||||
|
@ -1597,8 +1598,27 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX)
|
|||
|
||||
guest_bypass:
|
||||
stw r12, STACK_SLOT_TRAP(r1)
|
||||
mr r3, r12
|
||||
|
||||
/* Save DEC */
|
||||
/* Do this before kvmhv_commence_exit so we know TB is guest TB */
|
||||
ld r3, HSTATE_KVM_VCORE(r13)
|
||||
mfspr r5,SPRN_DEC
|
||||
mftb r6
|
||||
/* On P9, if the guest has large decr enabled, don't sign extend */
|
||||
BEGIN_FTR_SECTION
|
||||
ld r4, VCORE_LPCR(r3)
|
||||
andis. r4, r4, LPCR_LD@h
|
||||
bne 16f
|
||||
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
|
||||
extsw r5,r5
|
||||
16: add r5,r5,r6
|
||||
/* r5 is a guest timebase value here, convert to host TB */
|
||||
ld r4,VCORE_TB_OFFSET_APPL(r3)
|
||||
subf r5,r4,r5
|
||||
std r5,VCPU_DEC_EXPIRES(r9)
|
||||
|
||||
/* Increment exit count, poke other threads to exit */
|
||||
mr r3, r12
|
||||
bl kvmhv_commence_exit
|
||||
nop
|
||||
ld r9, HSTATE_KVM_VCPU(r13)
|
||||
|
@ -1639,23 +1659,6 @@ guest_bypass:
|
|||
mtspr SPRN_PURR,r3
|
||||
mtspr SPRN_SPURR,r4
|
||||
|
||||
/* Save DEC */
|
||||
ld r3, HSTATE_KVM_VCORE(r13)
|
||||
mfspr r5,SPRN_DEC
|
||||
mftb r6
|
||||
/* On P9, if the guest has large decr enabled, don't sign extend */
|
||||
BEGIN_FTR_SECTION
|
||||
ld r4, VCORE_LPCR(r3)
|
||||
andis. r4, r4, LPCR_LD@h
|
||||
bne 16f
|
||||
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
|
||||
extsw r5,r5
|
||||
16: add r5,r5,r6
|
||||
/* r5 is a guest timebase value here, convert to host TB */
|
||||
ld r4,VCORE_TB_OFFSET(r3)
|
||||
subf r5,r4,r5
|
||||
std r5,VCPU_DEC_EXPIRES(r9)
|
||||
|
||||
BEGIN_FTR_SECTION
|
||||
b 8f
|
||||
END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
|
||||
|
@ -1905,6 +1908,14 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
|
|||
cmpwi cr2, r0, 0
|
||||
beq cr2, 4f
|
||||
|
||||
/*
|
||||
* Radix: do eieio; tlbsync; ptesync sequence in case we
|
||||
* interrupted the guest between a tlbie and a ptesync.
|
||||
*/
|
||||
eieio
|
||||
tlbsync
|
||||
ptesync
|
||||
|
||||
/* Radix: Handle the case where the guest used an illegal PID */
|
||||
LOAD_REG_ADDR(r4, mmu_base_pid)
|
||||
lwz r3, VCPU_GUEST_PID(r9)
|
||||
|
@ -2017,9 +2028,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
|
|||
|
||||
27:
|
||||
/* Subtract timebase offset from timebase */
|
||||
ld r8,VCORE_TB_OFFSET(r5)
|
||||
ld r8, VCORE_TB_OFFSET_APPL(r5)
|
||||
cmpdi r8,0
|
||||
beq 17f
|
||||
li r0, 0
|
||||
std r0, VCORE_TB_OFFSET_APPL(r5)
|
||||
mftb r6 /* current guest timebase */
|
||||
subf r8,r8,r6
|
||||
mtspr SPRN_TBU40,r8 /* update upper 40 bits */
|
||||
|
@ -2700,7 +2713,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
|
|||
add r3, r3, r5
|
||||
ld r4, HSTATE_KVM_VCPU(r13)
|
||||
ld r5, HSTATE_KVM_VCORE(r13)
|
||||
ld r6, VCORE_TB_OFFSET(r5)
|
||||
ld r6, VCORE_TB_OFFSET_APPL(r5)
|
||||
subf r3, r6, r3 /* convert to host TB value */
|
||||
std r3, VCPU_DEC_EXPIRES(r4)
|
||||
|
||||
|
@ -2799,7 +2812,7 @@ END_FTR_SECTION(CPU_FTR_TM | CPU_FTR_P9_TM_HV_ASSIST, 0)
|
|||
/* Restore guest decrementer */
|
||||
ld r3, VCPU_DEC_EXPIRES(r4)
|
||||
ld r5, HSTATE_KVM_VCORE(r13)
|
||||
ld r6, VCORE_TB_OFFSET(r5)
|
||||
ld r6, VCORE_TB_OFFSET_APPL(r5)
|
||||
add r3, r3, r6 /* convert host TB to guest TB value */
|
||||
mftb r7
|
||||
subf r3, r7, r3
|
||||
|
@ -3606,12 +3619,9 @@ kvmppc_fix_pmao:
|
|||
*/
|
||||
kvmhv_start_timing:
|
||||
ld r5, HSTATE_KVM_VCORE(r13)
|
||||
lbz r6, VCORE_IN_GUEST(r5)
|
||||
cmpwi r6, 0
|
||||
beq 5f /* if in guest, need to */
|
||||
ld r6, VCORE_TB_OFFSET(r5) /* subtract timebase offset */
|
||||
5: mftb r5
|
||||
subf r5, r6, r5
|
||||
ld r6, VCORE_TB_OFFSET_APPL(r5)
|
||||
mftb r5
|
||||
subf r5, r6, r5 /* subtract current timebase offset */
|
||||
std r3, VCPU_CUR_ACTIVITY(r4)
|
||||
std r5, VCPU_ACTIVITY_START(r4)
|
||||
blr
|
||||
|
@ -3622,15 +3632,12 @@ kvmhv_start_timing:
|
|||
*/
|
||||
kvmhv_accumulate_time:
|
||||
ld r5, HSTATE_KVM_VCORE(r13)
|
||||
lbz r8, VCORE_IN_GUEST(r5)
|
||||
cmpwi r8, 0
|
||||
beq 4f /* if in guest, need to */
|
||||
ld r8, VCORE_TB_OFFSET(r5) /* subtract timebase offset */
|
||||
4: ld r5, VCPU_CUR_ACTIVITY(r4)
|
||||
ld r8, VCORE_TB_OFFSET_APPL(r5)
|
||||
ld r5, VCPU_CUR_ACTIVITY(r4)
|
||||
ld r6, VCPU_ACTIVITY_START(r4)
|
||||
std r3, VCPU_CUR_ACTIVITY(r4)
|
||||
mftb r7
|
||||
subf r7, r8, r7
|
||||
subf r7, r8, r7 /* subtract current timebase offset */
|
||||
std r7, VCPU_ACTIVITY_START(r4)
|
||||
cmpdi r5, 0
|
||||
beqlr
|
||||
|
|
|
@ -11,6 +11,9 @@
|
|||
#define XGLUE(a,b) a##b
|
||||
#define GLUE(a,b) XGLUE(a,b)
|
||||
|
||||
/* Dummy interrupt used when taking interrupts out of a queue in H_CPPR */
|
||||
#define XICS_DUMMY 1
|
||||
|
||||
static void GLUE(X_PFX,ack_pending)(struct kvmppc_xive_vcpu *xc)
|
||||
{
|
||||
u8 cppr;
|
||||
|
@ -205,6 +208,10 @@ skip_ipi:
|
|||
goto skip_ipi;
|
||||
}
|
||||
|
||||
/* If it's the dummy interrupt, continue searching */
|
||||
if (hirq == XICS_DUMMY)
|
||||
goto skip_ipi;
|
||||
|
||||
/* If fetching, update queue pointers */
|
||||
if (scan_type == scan_fetch) {
|
||||
q->idx = idx;
|
||||
|
@ -385,9 +392,76 @@ static void GLUE(X_PFX,push_pending_to_hw)(struct kvmppc_xive_vcpu *xc)
|
|||
__x_writeb(prio, __x_tima + TM_SPC_SET_OS_PENDING);
|
||||
}
|
||||
|
||||
static void GLUE(X_PFX,scan_for_rerouted_irqs)(struct kvmppc_xive *xive,
|
||||
struct kvmppc_xive_vcpu *xc)
|
||||
{
|
||||
unsigned int prio;
|
||||
|
||||
/* For each priority that is now masked */
|
||||
for (prio = xc->cppr; prio < KVMPPC_XIVE_Q_COUNT; prio++) {
|
||||
struct xive_q *q = &xc->queues[prio];
|
||||
struct kvmppc_xive_irq_state *state;
|
||||
struct kvmppc_xive_src_block *sb;
|
||||
u32 idx, toggle, entry, irq, hw_num;
|
||||
struct xive_irq_data *xd;
|
||||
__be32 *qpage;
|
||||
u16 src;
|
||||
|
||||
idx = q->idx;
|
||||
toggle = q->toggle;
|
||||
qpage = READ_ONCE(q->qpage);
|
||||
if (!qpage)
|
||||
continue;
|
||||
|
||||
/* For each interrupt in the queue */
|
||||
for (;;) {
|
||||
entry = be32_to_cpup(qpage + idx);
|
||||
|
||||
/* No more ? */
|
||||
if ((entry >> 31) == toggle)
|
||||
break;
|
||||
irq = entry & 0x7fffffff;
|
||||
|
||||
/* Skip dummies and IPIs */
|
||||
if (irq == XICS_DUMMY || irq == XICS_IPI)
|
||||
goto next;
|
||||
sb = kvmppc_xive_find_source(xive, irq, &src);
|
||||
if (!sb)
|
||||
goto next;
|
||||
state = &sb->irq_state[src];
|
||||
|
||||
/* Has it been rerouted ? */
|
||||
if (xc->server_num == state->act_server)
|
||||
goto next;
|
||||
|
||||
/*
|
||||
* Allright, it *has* been re-routed, kill it from
|
||||
* the queue.
|
||||
*/
|
||||
qpage[idx] = cpu_to_be32((entry & 0x80000000) | XICS_DUMMY);
|
||||
|
||||
/* Find the HW interrupt */
|
||||
kvmppc_xive_select_irq(state, &hw_num, &xd);
|
||||
|
||||
/* If it's not an LSI, set PQ to 11 the EOI will force a resend */
|
||||
if (!(xd->flags & XIVE_IRQ_FLAG_LSI))
|
||||
GLUE(X_PFX,esb_load)(xd, XIVE_ESB_SET_PQ_11);
|
||||
|
||||
/* EOI the source */
|
||||
GLUE(X_PFX,source_eoi)(hw_num, xd);
|
||||
|
||||
next:
|
||||
idx = (idx + 1) & q->msk;
|
||||
if (idx == 0)
|
||||
toggle ^= 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
X_STATIC int GLUE(X_PFX,h_cppr)(struct kvm_vcpu *vcpu, unsigned long cppr)
|
||||
{
|
||||
struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
|
||||
struct kvmppc_xive *xive = vcpu->kvm->arch.xive;
|
||||
u8 old_cppr;
|
||||
|
||||
pr_devel("H_CPPR(cppr=%ld)\n", cppr);
|
||||
|
@ -407,14 +481,34 @@ X_STATIC int GLUE(X_PFX,h_cppr)(struct kvm_vcpu *vcpu, unsigned long cppr)
|
|||
*/
|
||||
smp_mb();
|
||||
|
||||
/*
|
||||
* We are masking less, we need to look for pending things
|
||||
* to deliver and set VP pending bits accordingly to trigger
|
||||
* a new interrupt otherwise we might miss MFRR changes for
|
||||
* which we have optimized out sending an IPI signal.
|
||||
*/
|
||||
if (cppr > old_cppr)
|
||||
if (cppr > old_cppr) {
|
||||
/*
|
||||
* We are masking less, we need to look for pending things
|
||||
* to deliver and set VP pending bits accordingly to trigger
|
||||
* a new interrupt otherwise we might miss MFRR changes for
|
||||
* which we have optimized out sending an IPI signal.
|
||||
*/
|
||||
GLUE(X_PFX,push_pending_to_hw)(xc);
|
||||
} else {
|
||||
/*
|
||||
* We are masking more, we need to check the queue for any
|
||||
* interrupt that has been routed to another CPU, take
|
||||
* it out (replace it with the dummy) and retrigger it.
|
||||
*
|
||||
* This is necessary since those interrupts may otherwise
|
||||
* never be processed, at least not until this CPU restores
|
||||
* its CPPR.
|
||||
*
|
||||
* This is in theory racy vs. HW adding new interrupts to
|
||||
* the queue. In practice this works because the interesting
|
||||
* cases are when the guest has done a set_xive() to move the
|
||||
* interrupt away, which flushes the xive, followed by the
|
||||
* target CPU doing a H_CPPR. So any new interrupt coming into
|
||||
* the queue must still be routed to us and isn't a source
|
||||
* of concern.
|
||||
*/
|
||||
GLUE(X_PFX,scan_for_rerouted_irqs)(xive, xc);
|
||||
}
|
||||
|
||||
/* Apply new CPPR */
|
||||
xc->hw_cppr = cppr;
|
||||
|
|
Loading…
Reference in New Issue