LoongArch: Add pv ipi support on guest kernel side

Upstream: no

PARAVIRT option and pv ipi is added on guest kernel side, function
pv_ipi_init() is to add ipi sending and ipi receiving hooks. This function
firstly checks whether system runs on VM mode. If kernel runs on VM mode,
it will call function kvm_para_available() to detect current hypervirsor
type. Now only KVM type detection is supported, the paravirt function can
work only if current hypervisor type is KVM, since there is only KVM
supported on LoongArch now.

PV IPI uses virtual IPI sender and virtual IPI receiver function. With
virutal IPI sender, ipi message is stored in DDR memory rather than
emulated HW. IPI multicast is supported, and 128 vcpus can received IPIs
at the same time like X86 KVM method. Hypercall method is used for IPI
sending.

With virtual IPI receiver, HW SW0 is used rather than real IPI HW. Since
VCPU has separate HW SW0 like HW timer, there is no trap in IPI interrupt
acknowledge. And IPI message is stored in DDR, no trap in get IPI message.

Signed-off-by: Bibo Mao <maobibo@loongson.cn>
Signed-off-by: Xianglai Li <lixianglai@loongson.cn>
This commit is contained in:
Bibo Mao 2024-03-01 11:42:21 +08:00 committed by Jianping Liu
parent fe60041dc6
commit 05e7e13eb9
8 changed files with 203 additions and 2 deletions

View File

@ -565,6 +565,15 @@ config CPU_HAS_PREFETCH
bool
default y
config PARAVIRT
bool "Enable paravirtualization code"
depends on AS_HAS_LVZ_EXTENSION
help
This changes the kernel so it can modify itself when it is run
under a hypervisor, potentially improving performance significantly
over full virtualization. However, when run without a hypervisor
the kernel is theoretically slower and slightly larger.
config ARCH_SUPPORTS_KEXEC
def_bool y

View File

@ -21,6 +21,7 @@ enum ipi_msg_type {
typedef struct {
unsigned int ipi_irqs[NR_IPI];
unsigned int __softirq_pending;
atomic_t message ____cacheline_aligned_in_smp;
} ____cacheline_aligned irq_cpustat_t;
DECLARE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat);

View File

@ -0,0 +1,27 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_LOONGARCH_PARAVIRT_H
#define _ASM_LOONGARCH_PARAVIRT_H
#ifdef CONFIG_PARAVIRT
#include <linux/static_call_types.h>
struct static_key;
extern struct static_key paravirt_steal_enabled;
extern struct static_key paravirt_steal_rq_enabled;
u64 dummy_steal_clock(int cpu);
DECLARE_STATIC_CALL(pv_steal_clock, dummy_steal_clock);
static inline u64 paravirt_steal_clock(int cpu)
{
return static_call(pv_steal_clock)(cpu);
}
int pv_ipi_init(void);
#else
static inline int pv_ipi_init(void)
{
return 0;
}
#endif // CONFIG_PARAVIRT
#endif

View File

@ -0,0 +1,10 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright (C) 2024 Loongson Technology Corporation Limited
*/
#ifndef _ASM_API_CLOCK_H
#define _ASM_API_CLOCK_H
#include <asm/paravirt.h>
#endif

View File

@ -49,6 +49,7 @@ obj-$(CONFIG_MODULES) += module.o module-sections.o
obj-$(CONFIG_STACKTRACE) += stacktrace.o
obj-$(CONFIG_PROC_FS) += proc.o
obj-$(CONFIG_PARAVIRT) += paravirt.o
obj-$(CONFIG_SMP) += smp.o

View File

@ -154,5 +154,5 @@ void __init init_IRQ(void)
per_cpu(irq_stack, i), per_cpu(irq_stack, i) + IRQ_STACK_SIZE);
}
set_csr_ecfg(ECFGF_IP0 | ECFGF_IP1 | ECFGF_IP2 | ECFGF_IPI | ECFGF_PMC);
set_csr_ecfg(ECFGF_SIP0 | ECFGF_IP0 | ECFGF_IP1 | ECFGF_IP2 | ECFGF_IPI | ECFGF_PMC);
}

View File

@ -0,0 +1,151 @@
// SPDX-License-Identifier: GPL-2.0
#include <linux/export.h>
#include <linux/types.h>
#include <linux/interrupt.h>
#include <linux/jump_label.h>
#include <linux/kvm_para.h>
#include <asm/paravirt.h>
#include <linux/static_call.h>
struct static_key paravirt_steal_enabled;
struct static_key paravirt_steal_rq_enabled;
static u64 native_steal_clock(int cpu)
{
return 0;
}
DEFINE_STATIC_CALL(pv_steal_clock, native_steal_clock);
#ifdef CONFIG_SMP
static void pv_send_ipi_single(int cpu, unsigned int action)
{
unsigned int min, old;
irq_cpustat_t *info = &per_cpu(irq_stat, cpu);
old = atomic_fetch_or(BIT(action), &info->message);
if (old)
return;
min = cpu_logical_map(cpu);
kvm_hypercall3(KVM_HCALL_FUNC_PV_IPI, 1, 0, min);
}
#define KVM_IPI_CLUSTER_SIZE (2 * BITS_PER_LONG)
static void pv_send_ipi_mask(const struct cpumask *mask, unsigned int action)
{
unsigned int cpu, i, min = 0, max = 0, old;
__uint128_t bitmap = 0;
irq_cpustat_t *info;
if (cpumask_empty(mask))
return;
action = BIT(action);
for_each_cpu(i, mask) {
info = &per_cpu(irq_stat, i);
old = atomic_fetch_or(action, &info->message);
if (old)
continue;
cpu = cpu_logical_map(i);
if (!bitmap) {
min = max = cpu;
} else if (cpu > min && cpu < min + KVM_IPI_CLUSTER_SIZE) {
max = cpu > max ? cpu : max;
} else if (cpu < min && (max - cpu) < KVM_IPI_CLUSTER_SIZE) {
bitmap <<= min - cpu;
min = cpu;
} else {
/*
* Physical cpuid is sorted in ascending order ascend
* for the next mask calculation, send IPI here
* directly and skip the remainding cpus
*/
kvm_hypercall3(KVM_HCALL_FUNC_PV_IPI,
(unsigned long)bitmap,
(unsigned long)(bitmap >> BITS_PER_LONG), min);
min = max = cpu;
bitmap = 0;
}
__set_bit(cpu - min, (unsigned long *)&bitmap);
}
if (bitmap)
kvm_hypercall3(KVM_HCALL_FUNC_PV_IPI, (unsigned long)bitmap,
(unsigned long)(bitmap >> BITS_PER_LONG), min);
}
static irqreturn_t loongson_do_swi(int irq, void *dev)
{
irq_cpustat_t *info;
long action;
/* Clear swi interrupt */
clear_csr_estat(1 << INT_SWI0);
info = this_cpu_ptr(&irq_stat);
action = atomic_xchg(&info->message, 0);
if (action & SMP_CALL_FUNCTION) {
generic_smp_call_function_interrupt();
info->ipi_irqs[IPI_CALL_FUNCTION]++;
}
if (action & SMP_RESCHEDULE) {
scheduler_ipi();
info->ipi_irqs[IPI_RESCHEDULE]++;
}
return IRQ_HANDLED;
}
static void pv_init_ipi(void)
{
int r, swi0;
swi0 = get_percpu_irq(INT_SWI0);
if (swi0 < 0)
panic("SWI0 IRQ mapping failed\n");
irq_set_percpu_devid(swi0);
r = request_percpu_irq(swi0, loongson_do_swi, "SWI0", &irq_stat);
if (r < 0)
panic("SWI0 IRQ request failed\n");
}
#endif
static bool kvm_para_available(void)
{
static int hypervisor_type;
int config;
if (!hypervisor_type) {
config = read_cpucfg(CPUCFG_KVM_SIG);
if (!memcmp(&config, KVM_SIGNATURE, 4))
hypervisor_type = HYPERVISOR_KVM;
}
return hypervisor_type == HYPERVISOR_KVM;
}
int __init pv_ipi_init(void)
{
int feature;
if (!cpu_has_hypervisor)
return 0;
if (!kvm_para_available())
return 0;
/*
* check whether KVM hypervisor supports pv_ipi or not
*/
feature = read_cpucfg(CPUCFG_KVM_FEATURE);
#ifdef CONFIG_SMP
if (feature & KVM_FEATURE_PV_IPI) {
smp_ops.init_ipi = pv_init_ipi;
smp_ops.send_ipi_single = pv_send_ipi_single;
smp_ops.send_ipi_mask = pv_send_ipi_mask;
}
#endif
return 1;
}

View File

@ -29,6 +29,7 @@
#include <asm/loongson.h>
#include <asm/mmu_context.h>
#include <asm/numa.h>
#include <asm/paravirt.h>
#include <asm/processor.h>
#include <asm/setup.h>
#include <asm/time.h>
@ -307,6 +308,7 @@ void __init loongson_smp_setup(void)
cpu_data[0].core = cpu_logical_map(0) % loongson_sysconf.cores_per_package;
cpu_data[0].package = cpu_logical_map(0) / loongson_sysconf.cores_per_package;
pv_ipi_init();
iocsr_write32(0xffffffff, LOONGARCH_IOCSR_IPI_EN);
pr_info("Detected %i available CPU(s)\n", loongson_sysconf.nr_cpus);
}
@ -351,7 +353,7 @@ void loongson_boot_secondary(int cpu, struct task_struct *idle)
void loongson_init_secondary(void)
{
unsigned int cpu = smp_processor_id();
unsigned int imask = ECFGF_IP0 | ECFGF_IP1 | ECFGF_IP2 |
unsigned int imask = ECFGF_SIP0 | ECFGF_IP0 | ECFGF_IP1 | ECFGF_IP2 |
ECFGF_IPI | ECFGF_PMC | ECFGF_TIMER;
change_csr_ecfg(ECFG0_IM, imask);