Merge branch 'kvm-ppc-next' of git://git.kernel.org/pub/scm/linux/kernel/git/paulus/powerpc into HEAD

Paul Mackerras writes:

    The highlights are:

    * Reduced latency for interrupts from PCI pass-through devices, from
      Suresh Warrier and me.
    * Halt-polling implementation from Suraj Jitindar Singh.
    * 64-bit VCPU statistics, also from Suraj.
    * Various other minor fixes and improvements.
This commit is contained in:
Paolo Bonzini 2016-09-13 15:01:29 +02:00
commit ad53e35ae5
36 changed files with 1367 additions and 504 deletions

View File

@ -183,15 +183,15 @@ struct kvm_vcpu_arch {
};
struct kvm_vm_stat {
u32 remote_tlb_flush;
ulong remote_tlb_flush;
};
struct kvm_vcpu_stat {
u32 halt_successful_poll;
u32 halt_attempted_poll;
u32 halt_poll_invalid;
u32 halt_wakeup;
u32 hvc_exit_stat;
u64 halt_successful_poll;
u64 halt_attempted_poll;
u64 halt_poll_invalid;
u64 halt_wakeup;
u64 hvc_exit_stat;
u64 wfe_exit_stat;
u64 wfi_exit_stat;
u64 mmio_exit_user;

View File

@ -290,15 +290,15 @@ struct kvm_vcpu_arch {
#endif
struct kvm_vm_stat {
u32 remote_tlb_flush;
ulong remote_tlb_flush;
};
struct kvm_vcpu_stat {
u32 halt_successful_poll;
u32 halt_attempted_poll;
u32 halt_poll_invalid;
u32 halt_wakeup;
u32 hvc_exit_stat;
u64 halt_successful_poll;
u64 halt_attempted_poll;
u64 halt_poll_invalid;
u64 halt_wakeup;
u64 hvc_exit_stat;
u64 wfe_exit_stat;
u64 wfi_exit_stat;
u64 mmio_exit_user;

View File

@ -110,32 +110,32 @@
extern atomic_t kvm_mips_instance;
struct kvm_vm_stat {
u32 remote_tlb_flush;
ulong remote_tlb_flush;
};
struct kvm_vcpu_stat {
u32 wait_exits;
u32 cache_exits;
u32 signal_exits;
u32 int_exits;
u32 cop_unusable_exits;
u32 tlbmod_exits;
u32 tlbmiss_ld_exits;
u32 tlbmiss_st_exits;
u32 addrerr_st_exits;
u32 addrerr_ld_exits;
u32 syscall_exits;
u32 resvd_inst_exits;
u32 break_inst_exits;
u32 trap_inst_exits;
u32 msa_fpe_exits;
u32 fpe_exits;
u32 msa_disabled_exits;
u32 flush_dcache_exits;
u32 halt_successful_poll;
u32 halt_attempted_poll;
u32 halt_poll_invalid;
u32 halt_wakeup;
u64 wait_exits;
u64 cache_exits;
u64 signal_exits;
u64 int_exits;
u64 cop_unusable_exits;
u64 tlbmod_exits;
u64 tlbmiss_ld_exits;
u64 tlbmiss_st_exits;
u64 addrerr_st_exits;
u64 addrerr_ld_exits;
u64 syscall_exits;
u64 resvd_inst_exits;
u64 break_inst_exits;
u64 trap_inst_exits;
u64 msa_fpe_exits;
u64 fpe_exits;
u64 msa_disabled_exits;
u64 flush_dcache_exits;
u64 halt_successful_poll;
u64 halt_attempted_poll;
u64 halt_poll_invalid;
u64 halt_wakeup;
};
struct kvm_arch_memory_slot {

View File

@ -244,6 +244,43 @@ static inline int segment_shift(int ssize)
return SID_SHIFT_1T;
}
/*
* This array is indexed by the LP field of the HPTE second dword.
* Since this field may contain some RPN bits, some entries are
* replicated so that we get the same value irrespective of RPN.
* The top 4 bits are the page size index (MMU_PAGE_*) for the
* actual page size, the bottom 4 bits are the base page size.
*/
extern u8 hpte_page_sizes[1 << LP_BITS];
static inline unsigned long __hpte_page_size(unsigned long h, unsigned long l,
bool is_base_size)
{
unsigned int i, lp;
if (!(h & HPTE_V_LARGE))
return 1ul << 12;
/* Look at the 8 bit LP value */
lp = (l >> LP_SHIFT) & ((1 << LP_BITS) - 1);
i = hpte_page_sizes[lp];
if (!i)
return 0;
if (!is_base_size)
i >>= 4;
return 1ul << mmu_psize_defs[i & 0xf].shift;
}
static inline unsigned long hpte_page_size(unsigned long h, unsigned long l)
{
return __hpte_page_size(h, l, 0);
}
static inline unsigned long hpte_base_page_size(unsigned long h, unsigned long l)
{
return __hpte_page_size(h, l, 1);
}
/*
* The current system page and segment sizes
*/

View File

@ -21,7 +21,7 @@
#ifndef __ASM_PPC64_HMI_H__
#define __ASM_PPC64_HMI_H__
#ifdef CONFIG_PPC_BOOK3S_64
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
#define CORE_TB_RESYNC_REQ_BIT 63
#define MAX_SUBCORE_PER_CORE 4

View File

@ -241,6 +241,35 @@ static inline void out_be64(volatile u64 __iomem *addr, u64 val)
#endif
#endif /* __powerpc64__ */
/*
* Simple Cache inhibited accessors
* Unlike the DEF_MMIO_* macros, these don't include any h/w memory
* barriers, callers need to manage memory barriers on their own.
* These can only be used in hypervisor real mode.
*/
static inline u32 _lwzcix(unsigned long addr)
{
u32 ret;
__asm__ __volatile__("lwzcix %0,0, %1"
: "=r" (ret) : "r" (addr) : "memory");
return ret;
}
static inline void _stbcix(u64 addr, u8 val)
{
__asm__ __volatile__("stbcix %0,0,%1"
: : "r" (val), "r" (addr) : "memory");
}
static inline void _stwcix(u64 addr, u32 val)
{
__asm__ __volatile__("stwcix %0,0,%1"
: : "r" (val), "r" (addr) : "memory");
}
/*
* Low level IO stream instructions are defined out of line for now
*/

View File

@ -105,6 +105,15 @@
#define BOOK3S_INTERRUPT_FAC_UNAVAIL 0xf60
#define BOOK3S_INTERRUPT_H_FAC_UNAVAIL 0xf80
/* book3s_hv */
/*
* Special trap used to indicate to host that this is a
* passthrough interrupt that could not be handled
* completely in the guest.
*/
#define BOOK3S_INTERRUPT_HV_RM_HARD 0x5555
#define BOOK3S_IRQPRIO_SYSTEM_RESET 0
#define BOOK3S_IRQPRIO_DATA_SEGMENT 1
#define BOOK3S_IRQPRIO_INST_SEGMENT 2
@ -136,6 +145,7 @@
#define RESUME_FLAG_NV (1<<0) /* Reload guest nonvolatile state? */
#define RESUME_FLAG_HOST (1<<1) /* Resume host? */
#define RESUME_FLAG_ARCH1 (1<<2)
#define RESUME_FLAG_ARCH2 (1<<3)
#define RESUME_GUEST 0
#define RESUME_GUEST_NV RESUME_FLAG_NV

View File

@ -69,6 +69,42 @@ struct hpte_cache {
int pagesize;
};
/*
* Struct for a virtual core.
* Note: entry_exit_map combines a bitmap of threads that have entered
* in the bottom 8 bits and a bitmap of threads that have exited in the
* next 8 bits. This is so that we can atomically set the entry bit
* iff the exit map is 0 without taking a lock.
*/
struct kvmppc_vcore {
int n_runnable;
int num_threads;
int entry_exit_map;
int napping_threads;
int first_vcpuid;
u16 pcpu;
u16 last_cpu;
u8 vcore_state;
u8 in_guest;
struct kvmppc_vcore *master_vcore;
struct kvm_vcpu *runnable_threads[MAX_SMT_THREADS];
struct list_head preempt_list;
spinlock_t lock;
struct swait_queue_head wq;
spinlock_t stoltb_lock; /* protects stolen_tb and preempt_tb */
u64 stolen_tb;
u64 preempt_tb;
struct kvm_vcpu *runner;
struct kvm *kvm;
u64 tb_offset; /* guest timebase - host timebase */
ulong lpcr;
u32 arch_compat;
ulong pcr;
ulong dpdes; /* doorbell state (POWER8) */
ulong conferring_threads;
unsigned int halt_poll_ns;
};
struct kvmppc_vcpu_book3s {
struct kvmppc_sid_map sid_map[SID_MAP_NUM];
struct {
@ -191,6 +227,7 @@ extern void kvmppc_copy_to_svcpu(struct kvmppc_book3s_shadow_vcpu *svcpu,
struct kvm_vcpu *vcpu);
extern void kvmppc_copy_from_svcpu(struct kvm_vcpu *vcpu,
struct kvmppc_book3s_shadow_vcpu *svcpu);
extern int kvm_irq_bypass;
static inline struct kvmppc_vcpu_book3s *to_book3s(struct kvm_vcpu *vcpu)
{

View File

@ -20,6 +20,8 @@
#ifndef __ASM_KVM_BOOK3S_64_H__
#define __ASM_KVM_BOOK3S_64_H__
#include <asm/book3s/64/mmu-hash.h>
#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
static inline struct kvmppc_book3s_shadow_vcpu *svcpu_get(struct kvm_vcpu *vcpu)
{
@ -97,56 +99,20 @@ static inline void __unlock_hpte(__be64 *hpte, unsigned long hpte_v)
hpte[0] = cpu_to_be64(hpte_v);
}
static inline int __hpte_actual_psize(unsigned int lp, int psize)
{
int i, shift;
unsigned int mask;
/* start from 1 ignoring MMU_PAGE_4K */
for (i = 1; i < MMU_PAGE_COUNT; i++) {
/* invalid penc */
if (mmu_psize_defs[psize].penc[i] == -1)
continue;
/*
* encoding bits per actual page size
* PTE LP actual page size
* rrrr rrrz >=8KB
* rrrr rrzz >=16KB
* rrrr rzzz >=32KB
* rrrr zzzz >=64KB
* .......
*/
shift = mmu_psize_defs[i].shift - LP_SHIFT;
if (shift > LP_BITS)
shift = LP_BITS;
mask = (1 << shift) - 1;
if ((lp & mask) == mmu_psize_defs[psize].penc[i])
return i;
}
return -1;
}
static inline unsigned long compute_tlbie_rb(unsigned long v, unsigned long r,
unsigned long pte_index)
{
int b_psize = MMU_PAGE_4K, a_psize = MMU_PAGE_4K;
int i, b_psize = MMU_PAGE_4K, a_psize = MMU_PAGE_4K;
unsigned int penc;
unsigned long rb = 0, va_low, sllp;
unsigned int lp = (r >> LP_SHIFT) & ((1 << LP_BITS) - 1);
if (v & HPTE_V_LARGE) {
for (b_psize = 0; b_psize < MMU_PAGE_COUNT; b_psize++) {
/* valid entries have a shift value */
if (!mmu_psize_defs[b_psize].shift)
continue;
a_psize = __hpte_actual_psize(lp, b_psize);
if (a_psize != -1)
break;
}
i = hpte_page_sizes[lp];
b_psize = i & 0xf;
a_psize = i >> 4;
}
/*
* Ignore the top 14 bits of va
* v have top two bits covering segment size, hence move
@ -215,45 +181,6 @@ static inline unsigned long compute_tlbie_rb(unsigned long v, unsigned long r,
return rb;
}
static inline unsigned long __hpte_page_size(unsigned long h, unsigned long l,
bool is_base_size)
{
int size, a_psize;
/* Look at the 8 bit LP value */
unsigned int lp = (l >> LP_SHIFT) & ((1 << LP_BITS) - 1);
/* only handle 4k, 64k and 16M pages for now */
if (!(h & HPTE_V_LARGE))
return 1ul << 12;
else {
for (size = 0; size < MMU_PAGE_COUNT; size++) {
/* valid entries have a shift value */
if (!mmu_psize_defs[size].shift)
continue;
a_psize = __hpte_actual_psize(lp, size);
if (a_psize != -1) {
if (is_base_size)
return 1ul << mmu_psize_defs[size].shift;
return 1ul << mmu_psize_defs[a_psize].shift;
}
}
}
return 0;
}
static inline unsigned long hpte_page_size(unsigned long h, unsigned long l)
{
return __hpte_page_size(h, l, 0);
}
static inline unsigned long hpte_base_page_size(unsigned long h, unsigned long l)
{
return __hpte_page_size(h, l, 1);
}
static inline unsigned long hpte_rpn(unsigned long ptel, unsigned long psize)
{
return ((ptel & HPTE_R_RPN) & ~(psize - 1)) >> PAGE_SHIFT;

View File

@ -43,6 +43,8 @@
#include <asm/cputhreads.h>
#define KVM_MAX_VCPU_ID (threads_per_subcore * KVM_MAX_VCORES)
#define __KVM_HAVE_ARCH_INTC_INITIALIZED
#ifdef CONFIG_KVM_MMIO
#define KVM_COALESCED_MMIO_PAGE_OFFSET 1
#endif
@ -95,42 +97,49 @@ struct kvmppc_vcpu_book3s;
struct kvmppc_book3s_shadow_vcpu;
struct kvm_vm_stat {
u32 remote_tlb_flush;
ulong remote_tlb_flush;
};
struct kvm_vcpu_stat {
u32 sum_exits;
u32 mmio_exits;
u32 signal_exits;
u32 light_exits;
u64 sum_exits;
u64 mmio_exits;
u64 signal_exits;
u64 light_exits;
/* Account for special types of light exits: */
u32 itlb_real_miss_exits;
u32 itlb_virt_miss_exits;
u32 dtlb_real_miss_exits;
u32 dtlb_virt_miss_exits;
u32 syscall_exits;
u32 isi_exits;
u32 dsi_exits;
u32 emulated_inst_exits;
u32 dec_exits;
u32 ext_intr_exits;
u32 halt_successful_poll;
u32 halt_attempted_poll;
u32 halt_poll_invalid;
u32 halt_wakeup;
u32 dbell_exits;
u32 gdbell_exits;
u32 ld;
u32 st;
u64 itlb_real_miss_exits;
u64 itlb_virt_miss_exits;
u64 dtlb_real_miss_exits;
u64 dtlb_virt_miss_exits;
u64 syscall_exits;
u64 isi_exits;
u64 dsi_exits;
u64 emulated_inst_exits;
u64 dec_exits;
u64 ext_intr_exits;
u64 halt_poll_success_ns;
u64 halt_poll_fail_ns;
u64 halt_wait_ns;
u64 halt_successful_poll;
u64 halt_attempted_poll;
u64 halt_successful_wait;
u64 halt_poll_invalid;
u64 halt_wakeup;
u64 dbell_exits;
u64 gdbell_exits;
u64 ld;
u64 st;
#ifdef CONFIG_PPC_BOOK3S
u32 pf_storage;
u32 pf_instruc;
u32 sp_storage;
u32 sp_instruc;
u32 queue_intr;
u32 ld_slow;
u32 st_slow;
u64 pf_storage;
u64 pf_instruc;
u64 sp_storage;
u64 sp_instruc;
u64 queue_intr;
u64 ld_slow;
u64 st_slow;
#endif
u64 pthru_all;
u64 pthru_host;
u64 pthru_bad_aff;
};
enum kvm_exit_types {
@ -197,6 +206,8 @@ struct kvmppc_spapr_tce_table {
struct kvmppc_xics;
struct kvmppc_icp;
struct kvmppc_passthru_irqmap;
/*
* The reverse mapping array has one entry for each HPTE,
* which stores the guest's view of the second word of the HPTE
@ -267,6 +278,7 @@ struct kvm_arch {
#endif
#ifdef CONFIG_KVM_XICS
struct kvmppc_xics *xics;
struct kvmppc_passthru_irqmap *pimap;
#endif
struct kvmppc_ops *kvm_ops;
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
@ -275,41 +287,6 @@ struct kvm_arch {
#endif
};
/*
* Struct for a virtual core.
* Note: entry_exit_map combines a bitmap of threads that have entered
* in the bottom 8 bits and a bitmap of threads that have exited in the
* next 8 bits. This is so that we can atomically set the entry bit
* iff the exit map is 0 without taking a lock.
*/
struct kvmppc_vcore {
int n_runnable;
int num_threads;
int entry_exit_map;
int napping_threads;
int first_vcpuid;
u16 pcpu;
u16 last_cpu;
u8 vcore_state;
u8 in_guest;
struct kvmppc_vcore *master_vcore;
struct list_head runnable_threads;
struct list_head preempt_list;
spinlock_t lock;
struct swait_queue_head wq;
spinlock_t stoltb_lock; /* protects stolen_tb and preempt_tb */
u64 stolen_tb;
u64 preempt_tb;
struct kvm_vcpu *runner;
struct kvm *kvm;
u64 tb_offset; /* guest timebase - host timebase */
ulong lpcr;
u32 arch_compat;
ulong pcr;
ulong dpdes; /* doorbell state (POWER8) */
ulong conferring_threads;
};
#define VCORE_ENTRY_MAP(vc) ((vc)->entry_exit_map & 0xff)
#define VCORE_EXIT_MAP(vc) ((vc)->entry_exit_map >> 8)
#define VCORE_IS_EXITING(vc) (VCORE_EXIT_MAP(vc) != 0)
@ -329,6 +306,7 @@ struct kvmppc_vcore {
#define VCORE_SLEEPING 3
#define VCORE_RUNNING 4
#define VCORE_EXITING 5
#define VCORE_POLLING 6
/*
* Struct used to manage memory for a virtual processor area
@ -397,6 +375,20 @@ struct kvmhv_tb_accumulator {
u64 tb_max; /* max time */
};
#ifdef CONFIG_PPC_BOOK3S_64
struct kvmppc_irq_map {
u32 r_hwirq;
u32 v_hwirq;
struct irq_desc *desc;
};
#define KVMPPC_PIRQ_MAPPED 1024
struct kvmppc_passthru_irqmap {
int n_mapped;
struct kvmppc_irq_map mapped[KVMPPC_PIRQ_MAPPED];
};
#endif
# ifdef CONFIG_PPC_FSL_BOOK3E
#define KVMPPC_BOOKE_IAC_NUM 2
#define KVMPPC_BOOKE_DAC_NUM 2
@ -668,7 +660,6 @@ struct kvm_vcpu_arch {
long pgfault_index;
unsigned long pgfault_hpte[2];
struct list_head run_list;
struct task_struct *run_task;
struct kvm_run *kvm_run;

View File

@ -287,6 +287,10 @@ struct kvmppc_ops {
long (*arch_vm_ioctl)(struct file *filp, unsigned int ioctl,
unsigned long arg);
int (*hcall_implemented)(unsigned long hcall);
int (*irq_bypass_add_producer)(struct irq_bypass_consumer *,
struct irq_bypass_producer *);
void (*irq_bypass_del_producer)(struct irq_bypass_consumer *,
struct irq_bypass_producer *);
};
extern struct kvmppc_ops *kvmppc_hv_ops;
@ -453,8 +457,19 @@ static inline int kvmppc_xics_enabled(struct kvm_vcpu *vcpu)
{
return vcpu->arch.irq_type == KVMPPC_IRQ_XICS;
}
static inline struct kvmppc_passthru_irqmap *kvmppc_get_passthru_irqmap(
struct kvm *kvm)
{
if (kvm && kvm_irq_bypass)
return kvm->arch.pimap;
return NULL;
}
extern void kvmppc_alloc_host_rm_ops(void);
extern void kvmppc_free_host_rm_ops(void);
extern void kvmppc_free_pimap(struct kvm *kvm);
extern int kvmppc_xics_rm_complete(struct kvm_vcpu *vcpu, u32 hcall);
extern void kvmppc_xics_free_icp(struct kvm_vcpu *vcpu);
extern int kvmppc_xics_create_icp(struct kvm_vcpu *vcpu, unsigned long server);
extern int kvm_vm_ioctl_xics_irq(struct kvm *kvm, struct kvm_irq_level *args);
@ -464,10 +479,23 @@ extern int kvmppc_xics_set_icp(struct kvm_vcpu *vcpu, u64 icpval);
extern int kvmppc_xics_connect_vcpu(struct kvm_device *dev,
struct kvm_vcpu *vcpu, u32 cpu);
extern void kvmppc_xics_ipi_action(void);
extern void kvmppc_xics_set_mapped(struct kvm *kvm, unsigned long guest_irq,
unsigned long host_irq);
extern void kvmppc_xics_clr_mapped(struct kvm *kvm, unsigned long guest_irq,
unsigned long host_irq);
extern long kvmppc_deliver_irq_passthru(struct kvm_vcpu *vcpu, u32 xirr,
struct kvmppc_irq_map *irq_map,
struct kvmppc_passthru_irqmap *pimap);
extern int h_ipi_redirect;
#else
static inline struct kvmppc_passthru_irqmap *kvmppc_get_passthru_irqmap(
struct kvm *kvm)
{ return NULL; }
static inline void kvmppc_alloc_host_rm_ops(void) {};
static inline void kvmppc_free_host_rm_ops(void) {};
static inline void kvmppc_free_pimap(struct kvm *kvm) {};
static inline int kvmppc_xics_rm_complete(struct kvm_vcpu *vcpu, u32 hcall)
{ return 0; }
static inline int kvmppc_xics_enabled(struct kvm_vcpu *vcpu)
{ return 0; }
static inline void kvmppc_xics_free_icp(struct kvm_vcpu *vcpu) { }

View File

@ -271,6 +271,7 @@ static inline bool early_radix_enabled(void)
#define MMU_PAGE_16G 13
#define MMU_PAGE_64G 14
/* N.B. we need to change the type of hpte_page_sizes if this gets to be > 16 */
#define MMU_PAGE_COUNT 15
#ifdef CONFIG_PPC_BOOK3S_64

View File

@ -67,6 +67,7 @@ int64_t opal_pci_config_write_half_word(uint64_t phb_id, uint64_t bus_dev_func,
int64_t opal_pci_config_write_word(uint64_t phb_id, uint64_t bus_dev_func,
uint64_t offset, uint32_t data);
int64_t opal_set_xive(uint32_t isn, uint16_t server, uint8_t priority);
int64_t opal_rm_set_xive(uint32_t isn, uint16_t server, uint8_t priority);
int64_t opal_get_xive(uint32_t isn, __be16 *server, uint8_t *priority);
int64_t opal_register_exception_handler(uint64_t opal_exception,
uint64_t handler_address,

View File

@ -183,11 +183,6 @@ struct paca_struct {
*/
u16 in_mce;
u8 hmi_event_available; /* HMI event is available */
/*
* Bitmap for sibling subcore status. See kvm/book3s_hv_ras.c for
* more details
*/
struct sibling_subcore_state *sibling_subcore_state;
#endif
/* Stuff for accurate time accounting */
@ -202,6 +197,13 @@ struct paca_struct {
struct kvmppc_book3s_shadow_vcpu shadow_vcpu;
#endif
struct kvmppc_host_state kvm_hstate;
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
/*
* Bitmap for sibling subcore status. See kvm/book3s_hv_ras.c for
* more details
*/
struct sibling_subcore_state *sibling_subcore_state;
#endif
#endif
};

View File

@ -12,6 +12,7 @@
#include <linux/pci.h>
#include <linux/pci_hotplug.h>
#include <linux/irq.h>
#include <misc/cxl-base.h>
#include <asm/opal-api.h>
@ -33,6 +34,8 @@ int pnv_cxl_alloc_hwirqs(struct pci_dev *dev, int num);
void pnv_cxl_release_hwirqs(struct pci_dev *dev, int hwirq, int num);
int pnv_cxl_get_irq_count(struct pci_dev *dev);
struct device_node *pnv_pci_get_phb_node(struct pci_dev *dev);
int64_t pnv_opal_pci_msi_eoi(struct irq_chip *chip, unsigned int hw_irq);
bool is_pnv_opal_msi(struct irq_chip *chip);
#ifdef CONFIG_CXL_BASE
int pnv_cxl_alloc_hwirq_ranges(struct cxl_irq_ranges *irqs,

View File

@ -41,7 +41,7 @@ obj-$(CONFIG_VDSO32) += vdso32/
obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o
obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_ppc970.o cpu_setup_pa6t.o
obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_power.o
obj-$(CONFIG_PPC_BOOK3S_64) += mce.o mce_power.o hmi.o
obj-$(CONFIG_PPC_BOOK3S_64) += mce.o mce_power.o
obj-$(CONFIG_PPC_BOOK3E_64) += exceptions-64e.o idle_book3e.o
obj-$(CONFIG_PPC64) += vdso64/
obj-$(CONFIG_ALTIVEC) += vecemu.o

View File

@ -22,6 +22,9 @@ config KVM
select ANON_INODES
select HAVE_KVM_EVENTFD
select SRCU
select KVM_VFIO
select IRQ_BYPASS_MANAGER
select HAVE_KVM_IRQ_BYPASS
config KVM_BOOK3S_HANDLER
bool

View File

@ -7,16 +7,16 @@ subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror
ccflags-y := -Ivirt/kvm -Iarch/powerpc/kvm
KVM := ../../../virt/kvm
common-objs-y = $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o \
$(KVM)/eventfd.o
common-objs-y = $(KVM)/kvm_main.o $(KVM)/eventfd.o
common-objs-$(CONFIG_KVM_VFIO) += $(KVM)/vfio.o
common-objs-$(CONFIG_KVM_MMIO) += $(KVM)/coalesced_mmio.o
CFLAGS_e500_mmu.o := -I.
CFLAGS_e500_mmu_host.o := -I.
CFLAGS_emulate.o := -I.
CFLAGS_emulate_loadstore.o := -I.
common-objs-y += powerpc.o emulate.o emulate_loadstore.o
common-objs-y += powerpc.o emulate_loadstore.o
obj-$(CONFIG_KVM_EXIT_TIMING) += timing.o
obj-$(CONFIG_KVM_BOOK3S_HANDLER) += book3s_exports.o
@ -24,6 +24,7 @@ AFLAGS_booke_interrupts.o := -I$(objtree)/$(obj)
kvm-e500-objs := \
$(common-objs-y) \
emulate.o \
booke.o \
booke_emulate.o \
booke_interrupts.o \
@ -35,6 +36,7 @@ kvm-objs-$(CONFIG_KVM_E500V2) := $(kvm-e500-objs)
kvm-e500mc-objs := \
$(common-objs-y) \
emulate.o \
booke.o \
booke_emulate.o \
bookehv_interrupts.o \
@ -61,9 +63,6 @@ kvm-pr-y := \
book3s_32_mmu.o
ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
kvm-book3s_64-module-objs := \
$(KVM)/coalesced_mmio.o
kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HANDLER) += \
book3s_rmhandlers.o
endif
@ -78,6 +77,7 @@ kvm-book3s_64-builtin-xics-objs-$(CONFIG_KVM_XICS) := \
ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HANDLER) += \
book3s_hv_hmi.o \
book3s_hv_rmhandlers.o \
book3s_hv_rm_mmu.o \
book3s_hv_ras.o \
@ -88,11 +88,8 @@ endif
kvm-book3s_64-objs-$(CONFIG_KVM_XICS) += \
book3s_xics.o
kvm-book3s_64-module-objs += \
$(KVM)/kvm_main.o \
$(KVM)/eventfd.o \
powerpc.o \
emulate_loadstore.o \
kvm-book3s_64-module-objs := \
$(common-objs-y) \
book3s.o \
book3s_64_vio.o \
book3s_rtas.o \
@ -102,6 +99,7 @@ kvm-objs-$(CONFIG_KVM_BOOK3S_64) := $(kvm-book3s_64-module-objs)
kvm-book3s_32-objs := \
$(common-objs-y) \
emulate.o \
fpu.o \
book3s_paired_singles.o \
book3s.o \

View File

@ -52,8 +52,12 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
{ "dec", VCPU_STAT(dec_exits) },
{ "ext_intr", VCPU_STAT(ext_intr_exits) },
{ "queue_intr", VCPU_STAT(queue_intr) },
{ "halt_poll_success_ns", VCPU_STAT(halt_poll_success_ns) },
{ "halt_poll_fail_ns", VCPU_STAT(halt_poll_fail_ns) },
{ "halt_wait_ns", VCPU_STAT(halt_wait_ns) },
{ "halt_successful_poll", VCPU_STAT(halt_successful_poll), },
{ "halt_attempted_poll", VCPU_STAT(halt_attempted_poll), },
{ "halt_successful_wait", VCPU_STAT(halt_successful_wait) },
{ "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
{ "halt_wakeup", VCPU_STAT(halt_wakeup) },
{ "pf_storage", VCPU_STAT(pf_storage) },
@ -64,6 +68,9 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
{ "ld_slow", VCPU_STAT(ld_slow) },
{ "st", VCPU_STAT(st) },
{ "st_slow", VCPU_STAT(st_slow) },
{ "pthru_all", VCPU_STAT(pthru_all) },
{ "pthru_host", VCPU_STAT(pthru_host) },
{ "pthru_bad_aff", VCPU_STAT(pthru_bad_aff) },
{ NULL }
};

View File

@ -53,11 +53,15 @@
#include <asm/smp.h>
#include <asm/dbell.h>
#include <asm/hmi.h>
#include <asm/pnv-pci.h>
#include <linux/gfp.h>
#include <linux/vmalloc.h>
#include <linux/highmem.h>
#include <linux/hugetlb.h>
#include <linux/kvm_irqfd.h>
#include <linux/irqbypass.h>
#include <linux/module.h>
#include <linux/compiler.h>
#include "book3s.h"
@ -70,6 +74,8 @@
/* Used to indicate that a guest page fault needs to be handled */
#define RESUME_PAGE_FAULT (RESUME_GUEST | RESUME_FLAG_ARCH1)
/* Used to indicate that a guest passthrough interrupt needs to be handled */
#define RESUME_PASSTHROUGH (RESUME_GUEST | RESUME_FLAG_ARCH2)
/* Used as a "null" value for timebase values */
#define TB_NIL (~(u64)0)
@ -89,14 +95,55 @@ static struct kernel_param_ops module_param_ops = {
.get = param_get_int,
};
module_param_cb(kvm_irq_bypass, &module_param_ops, &kvm_irq_bypass,
S_IRUGO | S_IWUSR);
MODULE_PARM_DESC(kvm_irq_bypass, "Bypass passthrough interrupt optimization");
module_param_cb(h_ipi_redirect, &module_param_ops, &h_ipi_redirect,
S_IRUGO | S_IWUSR);
MODULE_PARM_DESC(h_ipi_redirect, "Redirect H_IPI wakeup to a free host core");
#endif
/* Maximum halt poll interval defaults to KVM_HALT_POLL_NS_DEFAULT */
static unsigned int halt_poll_max_ns = KVM_HALT_POLL_NS_DEFAULT;
module_param(halt_poll_max_ns, uint, S_IRUGO | S_IWUSR);
MODULE_PARM_DESC(halt_poll_max_ns, "Maximum halt poll time in ns");
/* Factor by which the vcore halt poll interval is grown, default is to double
*/
static unsigned int halt_poll_ns_grow = 2;
module_param(halt_poll_ns_grow, int, S_IRUGO);
MODULE_PARM_DESC(halt_poll_ns_grow, "Factor halt poll time is grown by");
/* Factor by which the vcore halt poll interval is shrunk, default is to reset
*/
static unsigned int halt_poll_ns_shrink;
module_param(halt_poll_ns_shrink, int, S_IRUGO);
MODULE_PARM_DESC(halt_poll_ns_shrink, "Factor halt poll time is shrunk by");
static void kvmppc_end_cede(struct kvm_vcpu *vcpu);
static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu);
static inline struct kvm_vcpu *next_runnable_thread(struct kvmppc_vcore *vc,
int *ip)
{
int i = *ip;
struct kvm_vcpu *vcpu;
while (++i < MAX_SMT_THREADS) {
vcpu = READ_ONCE(vc->runnable_threads[i]);
if (vcpu) {
*ip = i;
return vcpu;
}
}
return NULL;
}
/* Used to traverse the list of runnable threads for a given vcore */
#define for_each_runnable_thread(i, vcpu, vc) \
for (i = -1; (vcpu = next_runnable_thread(vc, &i)); )
static bool kvmppc_ipi_thread(int cpu)
{
/* On POWER8 for IPIs to threads in the same core, use msgsnd */
@ -991,6 +1038,9 @@ static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu,
kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
r = RESUME_GUEST;
break;
case BOOK3S_INTERRUPT_HV_RM_HARD:
r = RESUME_PASSTHROUGH;
break;
default:
kvmppc_dump_regs(vcpu);
printk(KERN_EMERG "trap=0x%x | pc=0x%lx | msr=0x%llx\n",
@ -1493,7 +1543,6 @@ static struct kvmppc_vcore *kvmppc_vcore_create(struct kvm *kvm, int core)
if (vcore == NULL)
return NULL;
INIT_LIST_HEAD(&vcore->runnable_threads);
spin_lock_init(&vcore->lock);
spin_lock_init(&vcore->stoltb_lock);
init_swait_queue_head(&vcore->wq);
@ -1802,7 +1851,7 @@ static void kvmppc_remove_runnable(struct kvmppc_vcore *vc,
vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST;
spin_unlock_irq(&vcpu->arch.tbacct_lock);
--vc->n_runnable;
list_del(&vcpu->arch.run_list);
WRITE_ONCE(vc->runnable_threads[vcpu->arch.ptid], NULL);
}
static int kvmppc_grab_hwthread(int cpu)
@ -2209,10 +2258,10 @@ static bool can_piggyback(struct kvmppc_vcore *pvc, struct core_info *cip,
static void prepare_threads(struct kvmppc_vcore *vc)
{
struct kvm_vcpu *vcpu, *vnext;
int i;
struct kvm_vcpu *vcpu;
list_for_each_entry_safe(vcpu, vnext, &vc->runnable_threads,
arch.run_list) {
for_each_runnable_thread(i, vcpu, vc) {
if (signal_pending(vcpu->arch.run_task))
vcpu->arch.ret = -EINTR;
else if (vcpu->arch.vpa.update_pending ||
@ -2259,15 +2308,14 @@ static void collect_piggybacks(struct core_info *cip, int target_threads)
static void post_guest_process(struct kvmppc_vcore *vc, bool is_master)
{
int still_running = 0;
int still_running = 0, i;
u64 now;
long ret;
struct kvm_vcpu *vcpu, *vnext;
struct kvm_vcpu *vcpu;
spin_lock(&vc->lock);
now = get_tb();
list_for_each_entry_safe(vcpu, vnext, &vc->runnable_threads,
arch.run_list) {
for_each_runnable_thread(i, vcpu, vc) {
/* cancel pending dec exception if dec is positive */
if (now < vcpu->arch.dec_expires &&
kvmppc_core_pending_dec(vcpu))
@ -2307,8 +2355,8 @@ static void post_guest_process(struct kvmppc_vcore *vc, bool is_master)
}
if (vc->n_runnable > 0 && vc->runner == NULL) {
/* make sure there's a candidate runner awake */
vcpu = list_first_entry(&vc->runnable_threads,
struct kvm_vcpu, arch.run_list);
i = -1;
vcpu = next_runnable_thread(vc, &i);
wake_up(&vcpu->arch.cpu_run);
}
}
@ -2361,7 +2409,7 @@ static inline void kvmppc_set_host_core(int cpu)
*/
static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
{
struct kvm_vcpu *vcpu, *vnext;
struct kvm_vcpu *vcpu;
int i;
int srcu_idx;
struct core_info core_info;
@ -2397,8 +2445,7 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
*/
if ((threads_per_core > 1) &&
((vc->num_threads > threads_per_subcore) || !on_primary_thread())) {
list_for_each_entry_safe(vcpu, vnext, &vc->runnable_threads,
arch.run_list) {
for_each_runnable_thread(i, vcpu, vc) {
vcpu->arch.ret = -EBUSY;
kvmppc_remove_runnable(vc, vcpu);
wake_up(&vcpu->arch.cpu_run);
@ -2477,8 +2524,7 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
active |= 1 << thr;
list_for_each_entry(pvc, &core_info.vcs[sub], preempt_list) {
pvc->pcpu = pcpu + thr;
list_for_each_entry(vcpu, &pvc->runnable_threads,
arch.run_list) {
for_each_runnable_thread(i, vcpu, pvc) {
kvmppc_start_thread(vcpu, pvc);
kvmppc_create_dtl_entry(vcpu, pvc);
trace_kvm_guest_enter(vcpu);
@ -2604,34 +2650,92 @@ static void kvmppc_wait_for_exec(struct kvmppc_vcore *vc,
finish_wait(&vcpu->arch.cpu_run, &wait);
}
static void grow_halt_poll_ns(struct kvmppc_vcore *vc)
{
/* 10us base */
if (vc->halt_poll_ns == 0 && halt_poll_ns_grow)
vc->halt_poll_ns = 10000;
else
vc->halt_poll_ns *= halt_poll_ns_grow;
if (vc->halt_poll_ns > halt_poll_max_ns)
vc->halt_poll_ns = halt_poll_max_ns;
}
static void shrink_halt_poll_ns(struct kvmppc_vcore *vc)
{
if (halt_poll_ns_shrink == 0)
vc->halt_poll_ns = 0;
else
vc->halt_poll_ns /= halt_poll_ns_shrink;
}
/* Check to see if any of the runnable vcpus on the vcore have pending
* exceptions or are no longer ceded
*/
static int kvmppc_vcore_check_block(struct kvmppc_vcore *vc)
{
struct kvm_vcpu *vcpu;
int i;
for_each_runnable_thread(i, vcpu, vc) {
if (vcpu->arch.pending_exceptions || !vcpu->arch.ceded)
return 1;
}
return 0;
}
/*
* All the vcpus in this vcore are idle, so wait for a decrementer
* or external interrupt to one of the vcpus. vc->lock is held.
*/
static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc)
{
struct kvm_vcpu *vcpu;
ktime_t cur, start_poll, start_wait;
int do_sleep = 1;
u64 block_ns;
DECLARE_SWAITQUEUE(wait);
prepare_to_swait(&vc->wq, &wait, TASK_INTERRUPTIBLE);
/* Poll for pending exceptions and ceded state */
cur = start_poll = ktime_get();
if (vc->halt_poll_ns) {
ktime_t stop = ktime_add_ns(start_poll, vc->halt_poll_ns);
++vc->runner->stat.halt_attempted_poll;
/*
* Check one last time for pending exceptions and ceded state after
* we put ourselves on the wait queue
*/
list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) {
if (vcpu->arch.pending_exceptions || !vcpu->arch.ceded) {
do_sleep = 0;
break;
vc->vcore_state = VCORE_POLLING;
spin_unlock(&vc->lock);
do {
if (kvmppc_vcore_check_block(vc)) {
do_sleep = 0;
break;
}
cur = ktime_get();
} while (single_task_running() && ktime_before(cur, stop));
spin_lock(&vc->lock);
vc->vcore_state = VCORE_INACTIVE;
if (!do_sleep) {
++vc->runner->stat.halt_successful_poll;
goto out;
}
}
if (!do_sleep) {
prepare_to_swait(&vc->wq, &wait, TASK_INTERRUPTIBLE);
if (kvmppc_vcore_check_block(vc)) {
finish_swait(&vc->wq, &wait);
return;
do_sleep = 0;
/* If we polled, count this as a successful poll */
if (vc->halt_poll_ns)
++vc->runner->stat.halt_successful_poll;
goto out;
}
start_wait = ktime_get();
vc->vcore_state = VCORE_SLEEPING;
trace_kvmppc_vcore_blocked(vc, 0);
spin_unlock(&vc->lock);
@ -2640,13 +2744,52 @@ static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc)
spin_lock(&vc->lock);
vc->vcore_state = VCORE_INACTIVE;
trace_kvmppc_vcore_blocked(vc, 1);
++vc->runner->stat.halt_successful_wait;
cur = ktime_get();
out:
block_ns = ktime_to_ns(cur) - ktime_to_ns(start_poll);
/* Attribute wait time */
if (do_sleep) {
vc->runner->stat.halt_wait_ns +=
ktime_to_ns(cur) - ktime_to_ns(start_wait);
/* Attribute failed poll time */
if (vc->halt_poll_ns)
vc->runner->stat.halt_poll_fail_ns +=
ktime_to_ns(start_wait) -
ktime_to_ns(start_poll);
} else {
/* Attribute successful poll time */
if (vc->halt_poll_ns)
vc->runner->stat.halt_poll_success_ns +=
ktime_to_ns(cur) -
ktime_to_ns(start_poll);
}
/* Adjust poll time */
if (halt_poll_max_ns) {
if (block_ns <= vc->halt_poll_ns)
;
/* We slept and blocked for longer than the max halt time */
else if (vc->halt_poll_ns && block_ns > halt_poll_max_ns)
shrink_halt_poll_ns(vc);
/* We slept and our poll time is too small */
else if (vc->halt_poll_ns < halt_poll_max_ns &&
block_ns < halt_poll_max_ns)
grow_halt_poll_ns(vc);
} else
vc->halt_poll_ns = 0;
trace_kvmppc_vcore_wakeup(do_sleep, block_ns);
}
static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
{
int n_ceded;
int n_ceded, i;
struct kvmppc_vcore *vc;
struct kvm_vcpu *v, *vn;
struct kvm_vcpu *v;
trace_kvmppc_run_vcpu_enter(vcpu);
@ -2666,7 +2809,7 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
vcpu->arch.stolen_logged = vcore_stolen_time(vc, mftb());
vcpu->arch.state = KVMPPC_VCPU_RUNNABLE;
vcpu->arch.busy_preempt = TB_NIL;
list_add_tail(&vcpu->arch.run_list, &vc->runnable_threads);
WRITE_ONCE(vc->runnable_threads[vcpu->arch.ptid], vcpu);
++vc->n_runnable;
/*
@ -2706,8 +2849,7 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
kvmppc_wait_for_exec(vc, vcpu, TASK_INTERRUPTIBLE);
continue;
}
list_for_each_entry_safe(v, vn, &vc->runnable_threads,
arch.run_list) {
for_each_runnable_thread(i, v, vc) {
kvmppc_core_prepare_to_enter(v);
if (signal_pending(v->arch.run_task)) {
kvmppc_remove_runnable(vc, v);
@ -2720,7 +2862,7 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
if (!vc->n_runnable || vcpu->arch.state != KVMPPC_VCPU_RUNNABLE)
break;
n_ceded = 0;
list_for_each_entry(v, &vc->runnable_threads, arch.run_list) {
for_each_runnable_thread(i, v, vc) {
if (!v->arch.pending_exceptions)
n_ceded += v->arch.ceded;
else
@ -2759,8 +2901,8 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
if (vc->n_runnable && vc->vcore_state == VCORE_INACTIVE) {
/* Wake up some vcpu to run the core */
v = list_first_entry(&vc->runnable_threads,
struct kvm_vcpu, arch.run_list);
i = -1;
v = next_runnable_thread(vc, &i);
wake_up(&v->arch.cpu_run);
}
@ -2818,7 +2960,8 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu)
r = kvmppc_book3s_hv_page_fault(run, vcpu,
vcpu->arch.fault_dar, vcpu->arch.fault_dsisr);
srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
}
} else if (r == RESUME_PASSTHROUGH)
r = kvmppc_xics_rm_complete(vcpu, 0);
} while (is_kvmppc_resume_guest(r));
out:
@ -3247,6 +3390,8 @@ static void kvmppc_core_destroy_vm_hv(struct kvm *kvm)
kvmppc_free_vcores(kvm);
kvmppc_free_hpt(kvm);
kvmppc_free_pimap(kvm);
}
/* We don't need to emulate any privileged instructions or dcbz */
@ -3282,6 +3427,184 @@ static int kvmppc_core_check_processor_compat_hv(void)
return 0;
}
#ifdef CONFIG_KVM_XICS
void kvmppc_free_pimap(struct kvm *kvm)
{
kfree(kvm->arch.pimap);
}
static struct kvmppc_passthru_irqmap *kvmppc_alloc_pimap(void)
{
return kzalloc(sizeof(struct kvmppc_passthru_irqmap), GFP_KERNEL);
}
static int kvmppc_set_passthru_irq(struct kvm *kvm, int host_irq, int guest_gsi)
{
struct irq_desc *desc;
struct kvmppc_irq_map *irq_map;
struct kvmppc_passthru_irqmap *pimap;
struct irq_chip *chip;
int i;
if (!kvm_irq_bypass)
return 1;
desc = irq_to_desc(host_irq);
if (!desc)
return -EIO;
mutex_lock(&kvm->lock);
pimap = kvm->arch.pimap;
if (pimap == NULL) {
/* First call, allocate structure to hold IRQ map */
pimap = kvmppc_alloc_pimap();
if (pimap == NULL) {
mutex_unlock(&kvm->lock);
return -ENOMEM;
}
kvm->arch.pimap = pimap;
}
/*
* For now, we only support interrupts for which the EOI operation
* is an OPAL call followed by a write to XIRR, since that's
* what our real-mode EOI code does.
*/
chip = irq_data_get_irq_chip(&desc->irq_data);
if (!chip || !is_pnv_opal_msi(chip)) {
pr_warn("kvmppc_set_passthru_irq_hv: Could not assign IRQ map for (%d,%d)\n",
host_irq, guest_gsi);
mutex_unlock(&kvm->lock);
return -ENOENT;
}
/*
* See if we already have an entry for this guest IRQ number.
* If it's mapped to a hardware IRQ number, that's an error,
* otherwise re-use this entry.
*/
for (i = 0; i < pimap->n_mapped; i++) {
if (guest_gsi == pimap->mapped[i].v_hwirq) {
if (pimap->mapped[i].r_hwirq) {
mutex_unlock(&kvm->lock);
return -EINVAL;
}
break;
}
}
if (i == KVMPPC_PIRQ_MAPPED) {
mutex_unlock(&kvm->lock);
return -EAGAIN; /* table is full */
}
irq_map = &pimap->mapped[i];
irq_map->v_hwirq = guest_gsi;
irq_map->desc = desc;
/*
* Order the above two stores before the next to serialize with
* the KVM real mode handler.
*/
smp_wmb();
irq_map->r_hwirq = desc->irq_data.hwirq;
if (i == pimap->n_mapped)
pimap->n_mapped++;
kvmppc_xics_set_mapped(kvm, guest_gsi, desc->irq_data.hwirq);
mutex_unlock(&kvm->lock);
return 0;
}
static int kvmppc_clr_passthru_irq(struct kvm *kvm, int host_irq, int guest_gsi)
{
struct irq_desc *desc;
struct kvmppc_passthru_irqmap *pimap;
int i;
if (!kvm_irq_bypass)
return 0;
desc = irq_to_desc(host_irq);
if (!desc)
return -EIO;
mutex_lock(&kvm->lock);
if (kvm->arch.pimap == NULL) {
mutex_unlock(&kvm->lock);
return 0;
}
pimap = kvm->arch.pimap;
for (i = 0; i < pimap->n_mapped; i++) {
if (guest_gsi == pimap->mapped[i].v_hwirq)
break;
}
if (i == pimap->n_mapped) {
mutex_unlock(&kvm->lock);
return -ENODEV;
}
kvmppc_xics_clr_mapped(kvm, guest_gsi, pimap->mapped[i].r_hwirq);
/* invalidate the entry */
pimap->mapped[i].r_hwirq = 0;
/*
* We don't free this structure even when the count goes to
* zero. The structure is freed when we destroy the VM.
*/
mutex_unlock(&kvm->lock);
return 0;
}
static int kvmppc_irq_bypass_add_producer_hv(struct irq_bypass_consumer *cons,
struct irq_bypass_producer *prod)
{
int ret = 0;
struct kvm_kernel_irqfd *irqfd =
container_of(cons, struct kvm_kernel_irqfd, consumer);
irqfd->producer = prod;
ret = kvmppc_set_passthru_irq(irqfd->kvm, prod->irq, irqfd->gsi);
if (ret)
pr_info("kvmppc_set_passthru_irq (irq %d, gsi %d) fails: %d\n",
prod->irq, irqfd->gsi, ret);
return ret;
}
static void kvmppc_irq_bypass_del_producer_hv(struct irq_bypass_consumer *cons,
struct irq_bypass_producer *prod)
{
int ret;
struct kvm_kernel_irqfd *irqfd =
container_of(cons, struct kvm_kernel_irqfd, consumer);
irqfd->producer = NULL;
/*
* When producer of consumer is unregistered, we change back to
* default external interrupt handling mode - KVM real mode
* will switch back to host.
*/
ret = kvmppc_clr_passthru_irq(irqfd->kvm, prod->irq, irqfd->gsi);
if (ret)
pr_warn("kvmppc_clr_passthru_irq (irq %d, gsi %d) fails: %d\n",
prod->irq, irqfd->gsi, ret);
}
#endif
static long kvm_arch_vm_ioctl_hv(struct file *filp,
unsigned int ioctl, unsigned long arg)
{
@ -3400,6 +3723,10 @@ static struct kvmppc_ops kvm_ops_hv = {
.fast_vcpu_kick = kvmppc_fast_vcpu_kick_hv,
.arch_vm_ioctl = kvm_arch_vm_ioctl_hv,
.hcall_implemented = kvmppc_hcall_impl_hv,
#ifdef CONFIG_KVM_XICS
.irq_bypass_add_producer = kvmppc_irq_bypass_add_producer_hv,
.irq_bypass_del_producer = kvmppc_irq_bypass_del_producer_hv,
#endif
};
static int kvm_init_subcore_bitmap(void)

View File

@ -25,6 +25,7 @@
#include <asm/xics.h>
#include <asm/dbell.h>
#include <asm/cputhreads.h>
#include <asm/io.h>
#define KVM_CMA_CHUNK_ORDER 18
@ -286,3 +287,158 @@ void kvmhv_commence_exit(int trap)
struct kvmppc_host_rm_ops *kvmppc_host_rm_ops_hv;
EXPORT_SYMBOL_GPL(kvmppc_host_rm_ops_hv);
#ifdef CONFIG_KVM_XICS
static struct kvmppc_irq_map *get_irqmap(struct kvmppc_passthru_irqmap *pimap,
u32 xisr)
{
int i;
/*
* We access the mapped array here without a lock. That
* is safe because we never reduce the number of entries
* in the array and we never change the v_hwirq field of
* an entry once it is set.
*
* We have also carefully ordered the stores in the writer
* and the loads here in the reader, so that if we find a matching
* hwirq here, the associated GSI and irq_desc fields are valid.
*/
for (i = 0; i < pimap->n_mapped; i++) {
if (xisr == pimap->mapped[i].r_hwirq) {
/*
* Order subsequent reads in the caller to serialize
* with the writer.
*/
smp_rmb();
return &pimap->mapped[i];
}
}
return NULL;
}
/*
* If we have an interrupt that's not an IPI, check if we have a
* passthrough adapter and if so, check if this external interrupt
* is for the adapter.
* We will attempt to deliver the IRQ directly to the target VCPU's
* ICP, the virtual ICP (based on affinity - the xive value in ICS).
*
* If the delivery fails or if this is not for a passthrough adapter,
* return to the host to handle this interrupt. We earlier
* saved a copy of the XIRR in the PACA, it will be picked up by
* the host ICP driver.
*/
static int kvmppc_check_passthru(u32 xisr, __be32 xirr)
{
struct kvmppc_passthru_irqmap *pimap;
struct kvmppc_irq_map *irq_map;
struct kvm_vcpu *vcpu;
vcpu = local_paca->kvm_hstate.kvm_vcpu;
if (!vcpu)
return 1;
pimap = kvmppc_get_passthru_irqmap(vcpu->kvm);
if (!pimap)
return 1;
irq_map = get_irqmap(pimap, xisr);
if (!irq_map)
return 1;
/* We're handling this interrupt, generic code doesn't need to */
local_paca->kvm_hstate.saved_xirr = 0;
return kvmppc_deliver_irq_passthru(vcpu, xirr, irq_map, pimap);
}
#else
static inline int kvmppc_check_passthru(u32 xisr, __be32 xirr)
{
return 1;
}
#endif
/*
* Determine what sort of external interrupt is pending (if any).
* Returns:
* 0 if no interrupt is pending
* 1 if an interrupt is pending that needs to be handled by the host
* 2 Passthrough that needs completion in the host
* -1 if there was a guest wakeup IPI (which has now been cleared)
* -2 if there is PCI passthrough external interrupt that was handled
*/
long kvmppc_read_intr(void)
{
unsigned long xics_phys;
u32 h_xirr;
__be32 xirr;
u32 xisr;
u8 host_ipi;
/* see if a host IPI is pending */
host_ipi = local_paca->kvm_hstate.host_ipi;
if (host_ipi)
return 1;
/* Now read the interrupt from the ICP */
xics_phys = local_paca->kvm_hstate.xics_phys;
if (unlikely(!xics_phys))
return 1;
/*
* Save XIRR for later. Since we get control in reverse endian
* on LE systems, save it byte reversed and fetch it back in
* host endian. Note that xirr is the value read from the
* XIRR register, while h_xirr is the host endian version.
*/
xirr = _lwzcix(xics_phys + XICS_XIRR);
h_xirr = be32_to_cpu(xirr);
local_paca->kvm_hstate.saved_xirr = h_xirr;
xisr = h_xirr & 0xffffff;
/*
* Ensure that the store/load complete to guarantee all side
* effects of loading from XIRR has completed
*/
smp_mb();
/* if nothing pending in the ICP */
if (!xisr)
return 0;
/* We found something in the ICP...
*
* If it is an IPI, clear the MFRR and EOI it.
*/
if (xisr == XICS_IPI) {
_stbcix(xics_phys + XICS_MFRR, 0xff);
_stwcix(xics_phys + XICS_XIRR, xirr);
/*
* Need to ensure side effects of above stores
* complete before proceeding.
*/
smp_mb();
/*
* We need to re-check host IPI now in case it got set in the
* meantime. If it's clear, we bounce the interrupt to the
* guest
*/
host_ipi = local_paca->kvm_hstate.host_ipi;
if (unlikely(host_ipi != 0)) {
/* We raced with the host,
* we need to resend that IPI, bummer
*/
_stbcix(xics_phys + XICS_MFRR, IPI_PRIORITY);
/* Let side effects complete */
smp_mb();
return 1;
}
/* OK, it's an IPI for us */
local_paca->kvm_hstate.saved_xirr = 0;
return -1;
}
return kvmppc_check_passthru(xisr, xirr);
}

View File

@ -10,6 +10,7 @@
#include <linux/kernel.h>
#include <linux/kvm_host.h>
#include <linux/err.h>
#include <linux/kernel_stat.h>
#include <asm/kvm_book3s.h>
#include <asm/kvm_ppc.h>
@ -18,7 +19,10 @@
#include <asm/debug.h>
#include <asm/synch.h>
#include <asm/cputhreads.h>
#include <asm/pgtable.h>
#include <asm/ppc-opcode.h>
#include <asm/pnv-pci.h>
#include <asm/opal.h>
#include "book3s_xics.h"
@ -26,9 +30,12 @@
int h_ipi_redirect = 1;
EXPORT_SYMBOL(h_ipi_redirect);
int kvm_irq_bypass = 1;
EXPORT_SYMBOL(kvm_irq_bypass);
static void icp_rm_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
u32 new_irq);
static int xics_opal_rm_set_server(unsigned int hw_irq, int server_cpu);
/* -- ICS routines -- */
static void ics_rm_check_resend(struct kvmppc_xics *xics,
@ -708,10 +715,123 @@ int kvmppc_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr)
icp->rm_action |= XICS_RM_NOTIFY_EOI;
icp->rm_eoied_irq = irq;
}
if (state->host_irq) {
++vcpu->stat.pthru_all;
if (state->intr_cpu != -1) {
int pcpu = raw_smp_processor_id();
pcpu = cpu_first_thread_sibling(pcpu);
++vcpu->stat.pthru_host;
if (state->intr_cpu != pcpu) {
++vcpu->stat.pthru_bad_aff;
xics_opal_rm_set_server(state->host_irq, pcpu);
}
state->intr_cpu = -1;
}
}
bail:
return check_too_hard(xics, icp);
}
unsigned long eoi_rc;
static void icp_eoi(struct irq_chip *c, u32 hwirq, u32 xirr)
{
unsigned long xics_phys;
int64_t rc;
rc = pnv_opal_pci_msi_eoi(c, hwirq);
if (rc)
eoi_rc = rc;
iosync();
/* EOI it */
xics_phys = local_paca->kvm_hstate.xics_phys;
_stwcix(xics_phys + XICS_XIRR, xirr);
}
static int xics_opal_rm_set_server(unsigned int hw_irq, int server_cpu)
{
unsigned int mangle_cpu = get_hard_smp_processor_id(server_cpu) << 2;
return opal_rm_set_xive(hw_irq, mangle_cpu, DEFAULT_PRIORITY);
}
/*
* Increment a per-CPU 32-bit unsigned integer variable.
* Safe to call in real-mode. Handles vmalloc'ed addresses
*
* ToDo: Make this work for any integral type
*/
static inline void this_cpu_inc_rm(unsigned int __percpu *addr)
{
unsigned long l;
unsigned int *raddr;
int cpu = smp_processor_id();
raddr = per_cpu_ptr(addr, cpu);
l = (unsigned long)raddr;
if (REGION_ID(l) == VMALLOC_REGION_ID) {
l = vmalloc_to_phys(raddr);
raddr = (unsigned int *)l;
}
++*raddr;
}
/*
* We don't try to update the flags in the irq_desc 'istate' field in
* here as would happen in the normal IRQ handling path for several reasons:
* - state flags represent internal IRQ state and are not expected to be
* updated outside the IRQ subsystem
* - more importantly, these are useful for edge triggered interrupts,
* IRQ probing, etc., but we are only handling MSI/MSIx interrupts here
* and these states shouldn't apply to us.
*
* However, we do update irq_stats - we somewhat duplicate the code in
* kstat_incr_irqs_this_cpu() for this since this function is defined
* in irq/internal.h which we don't want to include here.
* The only difference is that desc->kstat_irqs is an allocated per CPU
* variable and could have been vmalloc'ed, so we can't directly
* call __this_cpu_inc() on it. The kstat structure is a static
* per CPU variable and it should be accessible by real-mode KVM.
*
*/
static void kvmppc_rm_handle_irq_desc(struct irq_desc *desc)
{
this_cpu_inc_rm(desc->kstat_irqs);
__this_cpu_inc(kstat.irqs_sum);
}
long kvmppc_deliver_irq_passthru(struct kvm_vcpu *vcpu,
u32 xirr,
struct kvmppc_irq_map *irq_map,
struct kvmppc_passthru_irqmap *pimap)
{
struct kvmppc_xics *xics;
struct kvmppc_icp *icp;
u32 irq;
irq = irq_map->v_hwirq;
xics = vcpu->kvm->arch.xics;
icp = vcpu->arch.icp;
kvmppc_rm_handle_irq_desc(irq_map->desc);
icp_rm_deliver_irq(xics, icp, irq);
/* EOI the interrupt */
icp_eoi(irq_desc_get_chip(irq_map->desc), irq_map->r_hwirq, xirr);
if (check_too_hard(xics, icp) == H_TOO_HARD)
return 2;
else
return -2;
}
/* --- Non-real mode XICS-related built-in routines --- */
/**

View File

@ -221,6 +221,13 @@ kvmppc_primary_no_guest:
li r3, 0 /* Don't wake on privileged (OS) doorbell */
b kvm_do_nap
/*
* kvm_novcpu_wakeup
* Entered from kvm_start_guest if kvm_hstate.napping is set
* to NAPPING_NOVCPU
* r2 = kernel TOC
* r13 = paca
*/
kvm_novcpu_wakeup:
ld r1, HSTATE_HOST_R1(r13)
ld r5, HSTATE_KVM_VCORE(r13)
@ -230,6 +237,13 @@ kvm_novcpu_wakeup:
/* check the wake reason */
bl kvmppc_check_wake_reason
/*
* Restore volatile registers since we could have called
* a C routine in kvmppc_check_wake_reason.
* r5 = VCORE
*/
ld r5, HSTATE_KVM_VCORE(r13)
/* see if any other thread is already exiting */
lwz r0, VCORE_ENTRY_EXIT(r5)
cmpwi r0, 0x100
@ -322,6 +336,11 @@ kvm_start_guest:
/* Check the wake reason in SRR1 to see why we got here */
bl kvmppc_check_wake_reason
/*
* kvmppc_check_wake_reason could invoke a C routine, but we
* have no volatile registers to restore when we return.
*/
cmpdi r3, 0
bge kvm_no_guest
@ -881,6 +900,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
cmpwi r3, 512 /* 1 microsecond */
blt hdec_soon
deliver_guest_interrupt:
ld r6, VCPU_CTR(r4)
ld r7, VCPU_XER(r4)
@ -895,7 +915,6 @@ kvmppc_cede_reentry: /* r4 = vcpu, r13 = paca */
mtspr SPRN_SRR0, r6
mtspr SPRN_SRR1, r7
deliver_guest_interrupt:
/* r11 = vcpu->arch.msr & ~MSR_HV */
rldicl r11, r11, 63 - MSR_HV_LG, 1
rotldi r11, r11, 1 + MSR_HV_LG
@ -1155,10 +1174,54 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
* set, we know the host wants us out so let's do it now
*/
bl kvmppc_read_intr
/*
* Restore the active volatile registers after returning from
* a C function.
*/
ld r9, HSTATE_KVM_VCPU(r13)
li r12, BOOK3S_INTERRUPT_EXTERNAL
/*
* kvmppc_read_intr return codes:
*
* Exit to host (r3 > 0)
* 1 An interrupt is pending that needs to be handled by the host
* Exit guest and return to host by branching to guest_exit_cont
*
* 2 Passthrough that needs completion in the host
* Exit guest and return to host by branching to guest_exit_cont
* However, we also set r12 to BOOK3S_INTERRUPT_HV_RM_HARD
* to indicate to the host to complete handling the interrupt
*
* Before returning to guest, we check if any CPU is heading out
* to the host and if so, we head out also. If no CPUs are heading
* check return values <= 0.
*
* Return to guest (r3 <= 0)
* 0 No external interrupt is pending
* -1 A guest wakeup IPI (which has now been cleared)
* In either case, we return to guest to deliver any pending
* guest interrupts.
*
* -2 A PCI passthrough external interrupt was handled
* (interrupt was delivered directly to guest)
* Return to guest to deliver any pending guest interrupts.
*/
cmpdi r3, 1
ble 1f
/* Return code = 2 */
li r12, BOOK3S_INTERRUPT_HV_RM_HARD
stw r12, VCPU_TRAP(r9)
b guest_exit_cont
1: /* Return code <= 1 */
cmpdi r3, 0
bgt guest_exit_cont
/* Check if any CPU is heading out to the host, if so head out too */
/* Return code <= 0 */
4: ld r5, HSTATE_KVM_VCORE(r13)
lwz r0, VCORE_ENTRY_EXIT(r5)
cmpwi r0, 0x100
@ -2217,6 +2280,16 @@ END_FTR_SECTION_IFSET(CPU_FTR_TM)
/* Check the wake reason in SRR1 to see why we got here */
bl kvmppc_check_wake_reason
/*
* Restore volatile registers since we could have called a
* C routine in kvmppc_check_wake_reason
* r4 = VCPU
* r3 tells us whether we need to return to host or not
* WARNING: it gets checked further down:
* should not modify r3 until this check is done.
*/
ld r4, HSTATE_KVM_VCPU(r13)
/* clear our bit in vcore->napping_threads */
34: ld r5,HSTATE_KVM_VCORE(r13)
lbz r7,HSTATE_PTID(r13)
@ -2230,7 +2303,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_TM)
li r0,0
stb r0,HSTATE_NAPPING(r13)
/* See if the wake reason means we need to exit */
/* See if the wake reason saved in r3 means we need to exit */
stw r12, VCPU_TRAP(r4)
mr r9, r4
cmpdi r3, 0
@ -2297,10 +2370,14 @@ machine_check_realmode:
* 0 if nothing needs to be done
* 1 if something happened that needs to be handled by the host
* -1 if there was a guest wakeup (IPI or msgsnd)
* -2 if we handled a PCI passthrough interrupt (returned by
* kvmppc_read_intr only)
*
* Also sets r12 to the interrupt vector for any interrupt that needs
* to be handled now by the host (0x500 for external interrupt), or zero.
* Modifies r0, r6, r7, r8.
* Modifies all volatile registers (since it may call a C function).
* This routine calls kvmppc_read_intr, a C function, if an external
* interrupt is pending.
*/
kvmppc_check_wake_reason:
mfspr r6, SPRN_SRR1
@ -2310,8 +2387,7 @@ FTR_SECTION_ELSE
rlwinm r6, r6, 45-31, 0xe /* P7 wake reason field is 3 bits */
ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_207S)
cmpwi r6, 8 /* was it an external interrupt? */
li r12, BOOK3S_INTERRUPT_EXTERNAL
beq kvmppc_read_intr /* if so, see what it was */
beq 7f /* if so, see what it was */
li r3, 0
li r12, 0
cmpwi r6, 6 /* was it the decrementer? */
@ -2350,83 +2426,28 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
li r3, 1
blr
/*
* Determine what sort of external interrupt is pending (if any).
* Returns:
* 0 if no interrupt is pending
* 1 if an interrupt is pending that needs to be handled by the host
* -1 if there was a guest wakeup IPI (which has now been cleared)
* Modifies r0, r6, r7, r8, returns value in r3.
*/
kvmppc_read_intr:
/* see if a host IPI is pending */
li r3, 1
lbz r0, HSTATE_HOST_IPI(r13)
cmpwi r0, 0
bne 1f
/* external interrupt - create a stack frame so we can call C */
7: mflr r0
std r0, PPC_LR_STKOFF(r1)
stdu r1, -PPC_MIN_STKFRM(r1)
bl kvmppc_read_intr
nop
li r12, BOOK3S_INTERRUPT_EXTERNAL
cmpdi r3, 1
ble 1f
/* Now read the interrupt from the ICP */
ld r6, HSTATE_XICS_PHYS(r13)
li r7, XICS_XIRR
cmpdi r6, 0
beq- 1f
lwzcix r0, r6, r7
/*
* Save XIRR for later. Since we get in in reverse endian on LE
* systems, save it byte reversed and fetch it back in host endian.
* Return code of 2 means PCI passthrough interrupt, but
* we need to return back to host to complete handling the
* interrupt. Trap reason is expected in r12 by guest
* exit code.
*/
li r3, HSTATE_SAVED_XIRR
STWX_BE r0, r3, r13
#ifdef __LITTLE_ENDIAN__
lwz r3, HSTATE_SAVED_XIRR(r13)
#else
mr r3, r0
#endif
rlwinm. r3, r3, 0, 0xffffff
sync
beq 1f /* if nothing pending in the ICP */
/* We found something in the ICP...
*
* If it's not an IPI, stash it in the PACA and return to
* the host, we don't (yet) handle directing real external
* interrupts directly to the guest
*/
cmpwi r3, XICS_IPI /* if there is, is it an IPI? */
bne 42f
/* It's an IPI, clear the MFRR and EOI it */
li r3, 0xff
li r8, XICS_MFRR
stbcix r3, r6, r8 /* clear the IPI */
stwcix r0, r6, r7 /* EOI it */
sync
/* We need to re-check host IPI now in case it got set in the
* meantime. If it's clear, we bounce the interrupt to the
* guest
*/
lbz r0, HSTATE_HOST_IPI(r13)
cmpwi r0, 0
bne- 43f
/* OK, it's an IPI for us */
li r12, 0
li r3, -1
1: blr
42: /* It's not an IPI and it's for the host. We saved a copy of XIRR in
* the PACA earlier, it will be picked up by the host ICP driver
*/
li r3, 1
b 1b
43: /* We raced with the host, we need to resend that IPI, bummer */
li r0, IPI_PRIORITY
stbcix r0, r6, r8 /* set the IPI */
sync
li r3, 1
b 1b
li r12, BOOK3S_INTERRUPT_HV_RM_HARD
1:
ld r0, PPC_MIN_STKFRM+PPC_LR_STKOFF(r1)
addi r1, r1, PPC_MIN_STKFRM
mtlr r0
blr
/*
* Save away FP, VMX and VSX registers.

View File

@ -99,6 +99,10 @@ static int ics_deliver_irq(struct kvmppc_xics *xics, u32 irq, u32 level)
return 0;
}
/* Record which CPU this arrived on for passed-through interrupts */
if (state->host_irq)
state->intr_cpu = raw_smp_processor_id();
/* Attempt delivery */
icp_deliver_irq(xics, NULL, irq);
@ -812,7 +816,7 @@ static noinline int kvmppc_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr)
return H_SUCCESS;
}
static noinline int kvmppc_xics_rm_complete(struct kvm_vcpu *vcpu, u32 hcall)
int kvmppc_xics_rm_complete(struct kvm_vcpu *vcpu, u32 hcall)
{
struct kvmppc_xics *xics = vcpu->kvm->arch.xics;
struct kvmppc_icp *icp = vcpu->arch.icp;
@ -841,6 +845,7 @@ static noinline int kvmppc_xics_rm_complete(struct kvm_vcpu *vcpu, u32 hcall)
return H_SUCCESS;
}
EXPORT_SYMBOL_GPL(kvmppc_xics_rm_complete);
int kvmppc_xics_hcall(struct kvm_vcpu *vcpu, u32 req)
{
@ -892,6 +897,21 @@ EXPORT_SYMBOL_GPL(kvmppc_xics_hcall);
/* -- Initialisation code etc. -- */
static void xics_debugfs_irqmap(struct seq_file *m,
struct kvmppc_passthru_irqmap *pimap)
{
int i;
if (!pimap)
return;
seq_printf(m, "========\nPIRQ mappings: %d maps\n===========\n",
pimap->n_mapped);
for (i = 0; i < pimap->n_mapped; i++) {
seq_printf(m, "r_hwirq=%x, v_hwirq=%x\n",
pimap->mapped[i].r_hwirq, pimap->mapped[i].v_hwirq);
}
}
static int xics_debug_show(struct seq_file *m, void *private)
{
struct kvmppc_xics *xics = m->private;
@ -913,6 +933,8 @@ static int xics_debug_show(struct seq_file *m, void *private)
t_check_resend = 0;
t_reject = 0;
xics_debugfs_irqmap(m, kvm->arch.pimap);
seq_printf(m, "=========\nICP state\n=========\n");
kvm_for_each_vcpu(i, vcpu, kvm) {
@ -1252,6 +1274,8 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level,
{
struct kvmppc_xics *xics = kvm->arch.xics;
if (!xics)
return -ENODEV;
return ics_deliver_irq(xics, irq, level);
}
@ -1418,3 +1442,34 @@ int kvm_irq_map_chip_pin(struct kvm *kvm, unsigned irqchip, unsigned pin)
{
return pin;
}
void kvmppc_xics_set_mapped(struct kvm *kvm, unsigned long irq,
unsigned long host_irq)
{
struct kvmppc_xics *xics = kvm->arch.xics;
struct kvmppc_ics *ics;
u16 idx;
ics = kvmppc_xics_find_ics(xics, irq, &idx);
if (!ics)
return;
ics->irq_state[idx].host_irq = host_irq;
ics->irq_state[idx].intr_cpu = -1;
}
EXPORT_SYMBOL_GPL(kvmppc_xics_set_mapped);
void kvmppc_xics_clr_mapped(struct kvm *kvm, unsigned long irq,
unsigned long host_irq)
{
struct kvmppc_xics *xics = kvm->arch.xics;
struct kvmppc_ics *ics;
u16 idx;
ics = kvmppc_xics_find_ics(xics, irq, &idx);
if (!ics)
return;
ics->irq_state[idx].host_irq = 0;
}
EXPORT_SYMBOL_GPL(kvmppc_xics_clr_mapped);

View File

@ -42,6 +42,8 @@ struct ics_irq_state {
u8 lsi; /* level-sensitive interrupt */
u8 asserted; /* Only for LSI */
u8 exists;
int intr_cpu;
u32 host_irq;
};
/* Atomic ICP state, updated with a single compare & swap */

View File

@ -743,7 +743,7 @@ int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu,
char *virt;
struct page **pages;
struct tlbe_priv *privs[2] = {};
u64 *g2h_bitmap = NULL;
u64 *g2h_bitmap;
size_t array_len;
u32 sets;
int num_pages, ret, i;
@ -779,41 +779,44 @@ int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu,
num_pages = DIV_ROUND_UP(cfg->array + array_len - 1, PAGE_SIZE) -
cfg->array / PAGE_SIZE;
pages = kmalloc(sizeof(struct page *) * num_pages, GFP_KERNEL);
pages = kmalloc_array(num_pages, sizeof(*pages), GFP_KERNEL);
if (!pages)
return -ENOMEM;
ret = get_user_pages_fast(cfg->array, num_pages, 1, pages);
if (ret < 0)
goto err_pages;
goto free_pages;
if (ret != num_pages) {
num_pages = ret;
ret = -EFAULT;
goto err_put_page;
goto put_pages;
}
virt = vmap(pages, num_pages, VM_MAP, PAGE_KERNEL);
if (!virt) {
ret = -ENOMEM;
goto err_put_page;
goto put_pages;
}
privs[0] = kzalloc(sizeof(struct tlbe_priv) * params.tlb_sizes[0],
GFP_KERNEL);
privs[1] = kzalloc(sizeof(struct tlbe_priv) * params.tlb_sizes[1],
GFP_KERNEL);
if (!privs[0] || !privs[1]) {
privs[0] = kcalloc(params.tlb_sizes[0], sizeof(*privs[0]), GFP_KERNEL);
if (!privs[0]) {
ret = -ENOMEM;
goto err_privs;
goto put_pages;
}
g2h_bitmap = kzalloc(sizeof(u64) * params.tlb_sizes[1],
GFP_KERNEL);
privs[1] = kcalloc(params.tlb_sizes[1], sizeof(*privs[1]), GFP_KERNEL);
if (!privs[1]) {
ret = -ENOMEM;
goto free_privs_first;
}
g2h_bitmap = kcalloc(params.tlb_sizes[1],
sizeof(*g2h_bitmap),
GFP_KERNEL);
if (!g2h_bitmap) {
ret = -ENOMEM;
goto err_privs;
goto free_privs_second;
}
free_gtlb(vcpu_e500);
@ -845,16 +848,14 @@ int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu,
kvmppc_recalc_tlb1map_range(vcpu_e500);
return 0;
err_privs:
kfree(privs[0]);
free_privs_second:
kfree(privs[1]);
err_put_page:
free_privs_first:
kfree(privs[0]);
put_pages:
for (i = 0; i < num_pages; i++)
put_page(pages[i]);
err_pages:
free_pages:
kfree(pages);
return ret;
}
@ -904,11 +905,9 @@ static int vcpu_mmu_init(struct kvm_vcpu *vcpu,
int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *vcpu_e500)
{
struct kvm_vcpu *vcpu = &vcpu_e500->vcpu;
int entry_size = sizeof(struct kvm_book3e_206_tlb_entry);
int entries = KVM_E500_TLB0_SIZE + KVM_E500_TLB1_SIZE;
if (e500_mmu_host_init(vcpu_e500))
goto err;
goto free_vcpu;
vcpu_e500->gtlb_params[0].entries = KVM_E500_TLB0_SIZE;
vcpu_e500->gtlb_params[1].entries = KVM_E500_TLB1_SIZE;
@ -920,37 +919,39 @@ int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *vcpu_e500)
vcpu_e500->gtlb_params[1].ways = KVM_E500_TLB1_SIZE;
vcpu_e500->gtlb_params[1].sets = 1;
vcpu_e500->gtlb_arch = kmalloc(entries * entry_size, GFP_KERNEL);
vcpu_e500->gtlb_arch = kmalloc_array(KVM_E500_TLB0_SIZE +
KVM_E500_TLB1_SIZE,
sizeof(*vcpu_e500->gtlb_arch),
GFP_KERNEL);
if (!vcpu_e500->gtlb_arch)
return -ENOMEM;
vcpu_e500->gtlb_offset[0] = 0;
vcpu_e500->gtlb_offset[1] = KVM_E500_TLB0_SIZE;
vcpu_e500->gtlb_priv[0] = kzalloc(sizeof(struct tlbe_ref) *
vcpu_e500->gtlb_params[0].entries,
vcpu_e500->gtlb_priv[0] = kcalloc(vcpu_e500->gtlb_params[0].entries,
sizeof(struct tlbe_ref),
GFP_KERNEL);
if (!vcpu_e500->gtlb_priv[0])
goto err;
goto free_vcpu;
vcpu_e500->gtlb_priv[1] = kzalloc(sizeof(struct tlbe_ref) *
vcpu_e500->gtlb_params[1].entries,
vcpu_e500->gtlb_priv[1] = kcalloc(vcpu_e500->gtlb_params[1].entries,
sizeof(struct tlbe_ref),
GFP_KERNEL);
if (!vcpu_e500->gtlb_priv[1])
goto err;
goto free_vcpu;
vcpu_e500->g2h_tlb1_map = kzalloc(sizeof(u64) *
vcpu_e500->gtlb_params[1].entries,
vcpu_e500->g2h_tlb1_map = kcalloc(vcpu_e500->gtlb_params[1].entries,
sizeof(*vcpu_e500->g2h_tlb1_map),
GFP_KERNEL);
if (!vcpu_e500->g2h_tlb1_map)
goto err;
goto free_vcpu;
vcpu_mmu_init(vcpu, vcpu_e500->gtlb_params);
kvmppc_recalc_tlb1map_range(vcpu_e500);
return 0;
err:
free_vcpu:
free_gtlb(vcpu_e500);
return -1;
}

View File

@ -27,6 +27,8 @@
#include <linux/slab.h>
#include <linux/file.h>
#include <linux/module.h>
#include <linux/irqbypass.h>
#include <linux/kvm_irqfd.h>
#include <asm/cputable.h>
#include <asm/uaccess.h>
#include <asm/kvm_ppc.h>
@ -739,6 +741,42 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
#endif
}
/*
* irq_bypass_add_producer and irq_bypass_del_producer are only
* useful if the architecture supports PCI passthrough.
* irq_bypass_stop and irq_bypass_start are not needed and so
* kvm_ops are not defined for them.
*/
bool kvm_arch_has_irq_bypass(void)
{
return ((kvmppc_hv_ops && kvmppc_hv_ops->irq_bypass_add_producer) ||
(kvmppc_pr_ops && kvmppc_pr_ops->irq_bypass_add_producer));
}
int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *cons,
struct irq_bypass_producer *prod)
{
struct kvm_kernel_irqfd *irqfd =
container_of(cons, struct kvm_kernel_irqfd, consumer);
struct kvm *kvm = irqfd->kvm;
if (kvm->arch.kvm_ops->irq_bypass_add_producer)
return kvm->arch.kvm_ops->irq_bypass_add_producer(cons, prod);
return 0;
}
void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons,
struct irq_bypass_producer *prod)
{
struct kvm_kernel_irqfd *irqfd =
container_of(cons, struct kvm_kernel_irqfd, consumer);
struct kvm *kvm = irqfd->kvm;
if (kvm->arch.kvm_ops->irq_bypass_del_producer)
kvm->arch.kvm_ops->irq_bypass_del_producer(cons, prod);
}
static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu,
struct kvm_run *run)
{
@ -1167,6 +1205,19 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
return r;
}
bool kvm_arch_intc_initialized(struct kvm *kvm)
{
#ifdef CONFIG_KVM_MPIC
if (kvm->arch.mpic)
return true;
#endif
#ifdef CONFIG_KVM_XICS
if (kvm->arch.xics)
return true;
#endif
return false;
}
int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
struct kvm_mp_state *mp_state)
{

View File

@ -432,6 +432,28 @@ TRACE_EVENT(kvmppc_vcore_blocked,
__entry->runner_vcpu, __entry->n_runnable, __entry->tgid)
);
TRACE_EVENT(kvmppc_vcore_wakeup,
TP_PROTO(int do_sleep, __u64 ns),
TP_ARGS(do_sleep, ns),
TP_STRUCT__entry(
__field(__u64, ns)
__field(int, waited)
__field(pid_t, tgid)
),
TP_fast_assign(
__entry->ns = ns;
__entry->waited = do_sleep;
__entry->tgid = current->tgid;
),
TP_printk("%s time %lld ns, tgid=%d",
__entry->waited ? "wait" : "poll",
__entry->ns, __entry->tgid)
);
TRACE_EVENT(kvmppc_run_vcpu_enter,
TP_PROTO(struct kvm_vcpu *vcpu),

View File

@ -493,36 +493,6 @@ static void native_hugepage_invalidate(unsigned long vsid,
}
#endif
static inline int __hpte_actual_psize(unsigned int lp, int psize)
{
int i, shift;
unsigned int mask;
/* start from 1 ignoring MMU_PAGE_4K */
for (i = 1; i < MMU_PAGE_COUNT; i++) {
/* invalid penc */
if (mmu_psize_defs[psize].penc[i] == -1)
continue;
/*
* encoding bits per actual page size
* PTE LP actual page size
* rrrr rrrz >=8KB
* rrrr rrzz >=16KB
* rrrr rzzz >=32KB
* rrrr zzzz >=64KB
* .......
*/
shift = mmu_psize_defs[i].shift - LP_SHIFT;
if (shift > LP_BITS)
shift = LP_BITS;
mask = (1 << shift) - 1;
if ((lp & mask) == mmu_psize_defs[psize].penc[i])
return i;
}
return -1;
}
static void hpte_decode(struct hash_pte *hpte, unsigned long slot,
int *psize, int *apsize, int *ssize, unsigned long *vpn)
{
@ -538,16 +508,8 @@ static void hpte_decode(struct hash_pte *hpte, unsigned long slot,
size = MMU_PAGE_4K;
a_size = MMU_PAGE_4K;
} else {
for (size = 0; size < MMU_PAGE_COUNT; size++) {
/* valid entries have a shift value */
if (!mmu_psize_defs[size].shift)
continue;
a_size = __hpte_actual_psize(lp, size);
if (a_size != -1)
break;
}
size = hpte_page_sizes[lp] & 0xf;
a_size = hpte_page_sizes[lp] >> 4;
}
/* This works for all page sizes, and for 256M and 1T segments */
if (cpu_has_feature(CPU_FTR_ARCH_300))

View File

@ -93,6 +93,9 @@ static unsigned long _SDR1;
struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT];
EXPORT_SYMBOL_GPL(mmu_psize_defs);
u8 hpte_page_sizes[1 << LP_BITS];
EXPORT_SYMBOL_GPL(hpte_page_sizes);
struct hash_pte *htab_address;
unsigned long htab_size_bytes;
unsigned long htab_hash_mask;
@ -564,8 +567,60 @@ static void __init htab_scan_page_sizes(void)
#endif /* CONFIG_HUGETLB_PAGE */
}
/*
* Fill in the hpte_page_sizes[] array.
* We go through the mmu_psize_defs[] array looking for all the
* supported base/actual page size combinations. Each combination
* has a unique pagesize encoding (penc) value in the low bits of
* the LP field of the HPTE. For actual page sizes less than 1MB,
* some of the upper LP bits are used for RPN bits, meaning that
* we need to fill in several entries in hpte_page_sizes[].
*
* In diagrammatic form, with r = RPN bits and z = page size bits:
* PTE LP actual page size
* rrrr rrrz >=8KB
* rrrr rrzz >=16KB
* rrrr rzzz >=32KB
* rrrr zzzz >=64KB
* ...
*
* The zzzz bits are implementation-specific but are chosen so that
* no encoding for a larger page size uses the same value in its
* low-order N bits as the encoding for the 2^(12+N) byte page size
* (if it exists).
*/
static void init_hpte_page_sizes(void)
{
long int ap, bp;
long int shift, penc;
for (bp = 0; bp < MMU_PAGE_COUNT; ++bp) {
if (!mmu_psize_defs[bp].shift)
continue; /* not a supported page size */
for (ap = bp; ap < MMU_PAGE_COUNT; ++ap) {
penc = mmu_psize_defs[bp].penc[ap];
if (penc == -1)
continue;
shift = mmu_psize_defs[ap].shift - LP_SHIFT;
if (shift <= 0)
continue; /* should never happen */
/*
* For page sizes less than 1MB, this loop
* replicates the entry for all possible values
* of the rrrr bits.
*/
while (penc < (1 << LP_BITS)) {
hpte_page_sizes[penc] = (ap << 4) | bp;
penc += 1 << shift;
}
}
}
}
static void __init htab_init_page_sizes(void)
{
init_hpte_page_sizes();
if (!debug_pagealloc_enabled()) {
/*
* Pick a size for the linear mapping. Currently, we only

View File

@ -208,6 +208,7 @@ OPAL_CALL(opal_pci_config_write_byte, OPAL_PCI_CONFIG_WRITE_BYTE);
OPAL_CALL(opal_pci_config_write_half_word, OPAL_PCI_CONFIG_WRITE_HALF_WORD);
OPAL_CALL(opal_pci_config_write_word, OPAL_PCI_CONFIG_WRITE_WORD);
OPAL_CALL(opal_set_xive, OPAL_SET_XIVE);
OPAL_CALL_REAL(opal_rm_set_xive, OPAL_SET_XIVE);
OPAL_CALL(opal_get_xive, OPAL_GET_XIVE);
OPAL_CALL(opal_register_exception_handler, OPAL_REGISTER_OPAL_EXCEPTION_HANDLER);
OPAL_CALL(opal_pci_eeh_freeze_status, OPAL_PCI_EEH_FREEZE_STATUS);

View File

@ -2710,15 +2710,21 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb,
}
#ifdef CONFIG_PCI_MSI
static void pnv_ioda2_msi_eoi(struct irq_data *d)
int64_t pnv_opal_pci_msi_eoi(struct irq_chip *chip, unsigned int hw_irq)
{
unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d);
struct irq_chip *chip = irq_data_get_irq_chip(d);
struct pnv_phb *phb = container_of(chip, struct pnv_phb,
ioda.irq_chip);
int64_t rc;
rc = opal_pci_msi_eoi(phb->opal_id, hw_irq);
return opal_pci_msi_eoi(phb->opal_id, hw_irq);
}
static void pnv_ioda2_msi_eoi(struct irq_data *d)
{
int64_t rc;
unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d);
struct irq_chip *chip = irq_data_get_irq_chip(d);
rc = pnv_opal_pci_msi_eoi(chip, hw_irq);
WARN_ON_ONCE(rc);
icp_native_eoi(d);
@ -2748,6 +2754,16 @@ void pnv_set_msi_irq_chip(struct pnv_phb *phb, unsigned int virq)
irq_set_chip(virq, &phb->ioda.irq_chip);
}
/*
* Returns true iff chip is something that we could call
* pnv_opal_pci_msi_eoi for.
*/
bool is_pnv_opal_msi(struct irq_chip *chip)
{
return chip->irq_eoi == pnv_ioda2_msi_eoi;
}
EXPORT_SYMBOL_GPL(is_pnv_opal_msi);
static int pnv_pci_ioda_msi_setup(struct pnv_phb *phb, struct pci_dev *dev,
unsigned int hwirq, unsigned int virq,
unsigned int is_64, struct msi_msg *msg)

View File

@ -245,72 +245,72 @@ struct sie_page {
} __packed;
struct kvm_vcpu_stat {
u32 exit_userspace;
u32 exit_null;
u32 exit_external_request;
u32 exit_external_interrupt;
u32 exit_stop_request;
u32 exit_validity;
u32 exit_instruction;
u32 exit_pei;
u32 halt_successful_poll;
u32 halt_attempted_poll;
u32 halt_poll_invalid;
u32 halt_wakeup;
u32 instruction_lctl;
u32 instruction_lctlg;
u32 instruction_stctl;
u32 instruction_stctg;
u32 exit_program_interruption;
u32 exit_instr_and_program;
u32 exit_operation_exception;
u32 deliver_external_call;
u32 deliver_emergency_signal;
u32 deliver_service_signal;
u32 deliver_virtio_interrupt;
u32 deliver_stop_signal;
u32 deliver_prefix_signal;
u32 deliver_restart_signal;
u32 deliver_program_int;
u32 deliver_io_int;
u32 exit_wait_state;
u32 instruction_pfmf;
u32 instruction_stidp;
u32 instruction_spx;
u32 instruction_stpx;
u32 instruction_stap;
u32 instruction_storage_key;
u32 instruction_ipte_interlock;
u32 instruction_stsch;
u32 instruction_chsc;
u32 instruction_stsi;
u32 instruction_stfl;
u32 instruction_tprot;
u32 instruction_sie;
u32 instruction_essa;
u32 instruction_sthyi;
u32 instruction_sigp_sense;
u32 instruction_sigp_sense_running;
u32 instruction_sigp_external_call;
u32 instruction_sigp_emergency;
u32 instruction_sigp_cond_emergency;
u32 instruction_sigp_start;
u32 instruction_sigp_stop;
u32 instruction_sigp_stop_store_status;
u32 instruction_sigp_store_status;
u32 instruction_sigp_store_adtl_status;
u32 instruction_sigp_arch;
u32 instruction_sigp_prefix;
u32 instruction_sigp_restart;
u32 instruction_sigp_init_cpu_reset;
u32 instruction_sigp_cpu_reset;
u32 instruction_sigp_unknown;
u32 diagnose_10;
u32 diagnose_44;
u32 diagnose_9c;
u32 diagnose_258;
u32 diagnose_308;
u32 diagnose_500;
u64 exit_userspace;
u64 exit_null;
u64 exit_external_request;
u64 exit_external_interrupt;
u64 exit_stop_request;
u64 exit_validity;
u64 exit_instruction;
u64 exit_pei;
u64 halt_successful_poll;
u64 halt_attempted_poll;
u64 halt_poll_invalid;
u64 halt_wakeup;
u64 instruction_lctl;
u64 instruction_lctlg;
u64 instruction_stctl;
u64 instruction_stctg;
u64 exit_program_interruption;
u64 exit_instr_and_program;
u64 exit_operation_exception;
u64 deliver_external_call;
u64 deliver_emergency_signal;
u64 deliver_service_signal;
u64 deliver_virtio_interrupt;
u64 deliver_stop_signal;
u64 deliver_prefix_signal;
u64 deliver_restart_signal;
u64 deliver_program_int;
u64 deliver_io_int;
u64 exit_wait_state;
u64 instruction_pfmf;
u64 instruction_stidp;
u64 instruction_spx;
u64 instruction_stpx;
u64 instruction_stap;
u64 instruction_storage_key;
u64 instruction_ipte_interlock;
u64 instruction_stsch;
u64 instruction_chsc;
u64 instruction_stsi;
u64 instruction_stfl;
u64 instruction_tprot;
u64 instruction_sie;
u64 instruction_essa;
u64 instruction_sthyi;
u64 instruction_sigp_sense;
u64 instruction_sigp_sense_running;
u64 instruction_sigp_external_call;
u64 instruction_sigp_emergency;
u64 instruction_sigp_cond_emergency;
u64 instruction_sigp_start;
u64 instruction_sigp_stop;
u64 instruction_sigp_stop_store_status;
u64 instruction_sigp_store_status;
u64 instruction_sigp_store_adtl_status;
u64 instruction_sigp_arch;
u64 instruction_sigp_prefix;
u64 instruction_sigp_restart;
u64 instruction_sigp_init_cpu_reset;
u64 instruction_sigp_cpu_reset;
u64 instruction_sigp_unknown;
u64 diagnose_10;
u64 diagnose_44;
u64 diagnose_9c;
u64 diagnose_258;
u64 diagnose_308;
u64 diagnose_500;
};
#define PGM_OPERATION 0x01
@ -577,7 +577,7 @@ struct kvm_vcpu_arch {
};
struct kvm_vm_stat {
u32 remote_tlb_flush;
ulong remote_tlb_flush;
};
struct kvm_arch_memory_slot {

View File

@ -792,45 +792,45 @@ struct kvm_arch {
};
struct kvm_vm_stat {
u32 mmu_shadow_zapped;
u32 mmu_pte_write;
u32 mmu_pte_updated;
u32 mmu_pde_zapped;
u32 mmu_flooded;
u32 mmu_recycled;
u32 mmu_cache_miss;
u32 mmu_unsync;
u32 remote_tlb_flush;
u32 lpages;
ulong mmu_shadow_zapped;
ulong mmu_pte_write;
ulong mmu_pte_updated;
ulong mmu_pde_zapped;
ulong mmu_flooded;
ulong mmu_recycled;
ulong mmu_cache_miss;
ulong mmu_unsync;
ulong remote_tlb_flush;
ulong lpages;
};
struct kvm_vcpu_stat {
u32 pf_fixed;
u32 pf_guest;
u32 tlb_flush;
u32 invlpg;
u64 pf_fixed;
u64 pf_guest;
u64 tlb_flush;
u64 invlpg;
u32 exits;
u32 io_exits;
u32 mmio_exits;
u32 signal_exits;
u32 irq_window_exits;
u32 nmi_window_exits;
u32 halt_exits;
u32 halt_successful_poll;
u32 halt_attempted_poll;
u32 halt_poll_invalid;
u32 halt_wakeup;
u32 request_irq_exits;
u32 irq_exits;
u32 host_state_reload;
u32 efer_reload;
u32 fpu_reload;
u32 insn_emulation;
u32 insn_emulation_fail;
u32 hypercalls;
u32 irq_injections;
u32 nmi_injections;
u64 exits;
u64 io_exits;
u64 mmio_exits;
u64 signal_exits;
u64 irq_window_exits;
u64 nmi_window_exits;
u64 halt_exits;
u64 halt_successful_poll;
u64 halt_attempted_poll;
u64 halt_poll_invalid;
u64 halt_wakeup;
u64 request_irq_exits;
u64 irq_exits;
u64 host_state_reload;
u64 efer_reload;
u64 fpu_reload;
u64 insn_emulation;
u64 insn_emulation_fail;
u64 hypercalls;
u64 irq_injections;
u64 nmi_injections;
};
struct x86_instruction_info;

View File

@ -3619,7 +3619,7 @@ static int vm_stat_get_per_vm(void *data, u64 *val)
{
struct kvm_stat_data *stat_data = (struct kvm_stat_data *)data;
*val = *(u32 *)((void *)stat_data->kvm + stat_data->offset);
*val = *(ulong *)((void *)stat_data->kvm + stat_data->offset);
return 0;
}
@ -3649,7 +3649,7 @@ static int vcpu_stat_get_per_vm(void *data, u64 *val)
*val = 0;
kvm_for_each_vcpu(i, vcpu, stat_data->kvm)
*val += *(u32 *)((void *)vcpu + stat_data->offset);
*val += *(u64 *)((void *)vcpu + stat_data->offset);
return 0;
}