2019-06-04 16:11:33 +08:00
|
|
|
// SPDX-License-Identifier: GPL-2.0-only
|
2013-04-18 04:30:00 +08:00
|
|
|
/*
|
|
|
|
* Copyright 2012 Michael Ellerman, IBM Corporation.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include <linux/kernel.h>
|
|
|
|
#include <linux/kvm_host.h>
|
|
|
|
#include <linux/kvm.h>
|
|
|
|
#include <linux/err.h>
|
|
|
|
|
2016-12-25 03:46:01 +08:00
|
|
|
#include <linux/uaccess.h>
|
2013-04-18 04:30:00 +08:00
|
|
|
#include <asm/kvm_book3s.h>
|
|
|
|
#include <asm/kvm_ppc.h>
|
|
|
|
#include <asm/hvcall.h>
|
|
|
|
#include <asm/rtas.h>
|
2017-04-05 15:54:56 +08:00
|
|
|
#include <asm/xive.h>
|
2013-04-18 04:30:00 +08:00
|
|
|
|
2013-04-18 04:30:26 +08:00
|
|
|
#ifdef CONFIG_KVM_XICS
|
|
|
|
static void kvm_rtas_set_xive(struct kvm_vcpu *vcpu, struct rtas_args *args)
|
|
|
|
{
|
|
|
|
u32 irq, server, priority;
|
|
|
|
int rc;
|
|
|
|
|
2014-07-08 03:05:33 +08:00
|
|
|
if (be32_to_cpu(args->nargs) != 3 || be32_to_cpu(args->nret) != 1) {
|
2013-04-18 04:30:26 +08:00
|
|
|
rc = -3;
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
2014-07-08 03:05:33 +08:00
|
|
|
irq = be32_to_cpu(args->args[0]);
|
|
|
|
server = be32_to_cpu(args->args[1]);
|
|
|
|
priority = be32_to_cpu(args->args[2]);
|
2013-04-18 04:30:26 +08:00
|
|
|
|
KVM: PPC: Book3S: Allow XICS emulation to work in nested hosts using XIVE
Currently, the KVM code assumes that if the host kernel is using the
XIVE interrupt controller (the new interrupt controller that first
appeared in POWER9 systems), then the in-kernel XICS emulation will
use the XIVE hardware to deliver interrupts to the guest. However,
this only works when the host is running in hypervisor mode and has
full access to all of the XIVE functionality. It doesn't work in any
nested virtualization scenario, either with PR KVM or nested-HV KVM,
because the XICS-on-XIVE code calls directly into the native-XIVE
routines, which are not initialized and cannot function correctly
because they use OPAL calls, and OPAL is not available in a guest.
This means that using the in-kernel XICS emulation in a nested
hypervisor that is using XIVE as its interrupt controller will cause a
(nested) host kernel crash. To fix this, we change most of the places
where the current code calls xive_enabled() to select between the
XICS-on-XIVE emulation and the plain XICS emulation to call a new
function, xics_on_xive(), which returns false in a guest.
However, there is a further twist. The plain XICS emulation has some
functions which are used in real mode and access the underlying XICS
controller (the interrupt controller of the host) directly. In the
case of a nested hypervisor, this means doing XICS hypercalls
directly. When the nested host is using XIVE as its interrupt
controller, these hypercalls will fail. Therefore this also adds
checks in the places where the XICS emulation wants to access the
underlying interrupt controller directly, and if that is XIVE, makes
the code use the virtual mode fallback paths, which call generic
kernel infrastructure rather than doing direct XICS access.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
Reviewed-by: Cédric Le Goater <clg@kaod.org>
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
2019-02-04 19:07:20 +08:00
|
|
|
if (xics_on_xive())
|
2017-04-05 15:54:56 +08:00
|
|
|
rc = kvmppc_xive_set_xive(vcpu->kvm, irq, server, priority);
|
|
|
|
else
|
|
|
|
rc = kvmppc_xics_set_xive(vcpu->kvm, irq, server, priority);
|
2013-04-18 04:30:26 +08:00
|
|
|
if (rc)
|
|
|
|
rc = -3;
|
|
|
|
out:
|
2014-07-08 03:05:33 +08:00
|
|
|
args->rets[0] = cpu_to_be32(rc);
|
2013-04-18 04:30:26 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static void kvm_rtas_get_xive(struct kvm_vcpu *vcpu, struct rtas_args *args)
|
|
|
|
{
|
|
|
|
u32 irq, server, priority;
|
|
|
|
int rc;
|
|
|
|
|
2014-07-08 03:05:33 +08:00
|
|
|
if (be32_to_cpu(args->nargs) != 1 || be32_to_cpu(args->nret) != 3) {
|
2013-04-18 04:30:26 +08:00
|
|
|
rc = -3;
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
2014-07-08 03:05:33 +08:00
|
|
|
irq = be32_to_cpu(args->args[0]);
|
2013-04-18 04:30:26 +08:00
|
|
|
|
|
|
|
server = priority = 0;
|
KVM: PPC: Book3S: Allow XICS emulation to work in nested hosts using XIVE
Currently, the KVM code assumes that if the host kernel is using the
XIVE interrupt controller (the new interrupt controller that first
appeared in POWER9 systems), then the in-kernel XICS emulation will
use the XIVE hardware to deliver interrupts to the guest. However,
this only works when the host is running in hypervisor mode and has
full access to all of the XIVE functionality. It doesn't work in any
nested virtualization scenario, either with PR KVM or nested-HV KVM,
because the XICS-on-XIVE code calls directly into the native-XIVE
routines, which are not initialized and cannot function correctly
because they use OPAL calls, and OPAL is not available in a guest.
This means that using the in-kernel XICS emulation in a nested
hypervisor that is using XIVE as its interrupt controller will cause a
(nested) host kernel crash. To fix this, we change most of the places
where the current code calls xive_enabled() to select between the
XICS-on-XIVE emulation and the plain XICS emulation to call a new
function, xics_on_xive(), which returns false in a guest.
However, there is a further twist. The plain XICS emulation has some
functions which are used in real mode and access the underlying XICS
controller (the interrupt controller of the host) directly. In the
case of a nested hypervisor, this means doing XICS hypercalls
directly. When the nested host is using XIVE as its interrupt
controller, these hypercalls will fail. Therefore this also adds
checks in the places where the XICS emulation wants to access the
underlying interrupt controller directly, and if that is XIVE, makes
the code use the virtual mode fallback paths, which call generic
kernel infrastructure rather than doing direct XICS access.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
Reviewed-by: Cédric Le Goater <clg@kaod.org>
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
2019-02-04 19:07:20 +08:00
|
|
|
if (xics_on_xive())
|
2017-04-05 15:54:56 +08:00
|
|
|
rc = kvmppc_xive_get_xive(vcpu->kvm, irq, &server, &priority);
|
|
|
|
else
|
|
|
|
rc = kvmppc_xics_get_xive(vcpu->kvm, irq, &server, &priority);
|
2013-04-18 04:30:26 +08:00
|
|
|
if (rc) {
|
|
|
|
rc = -3;
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
2014-07-08 03:05:33 +08:00
|
|
|
args->rets[1] = cpu_to_be32(server);
|
|
|
|
args->rets[2] = cpu_to_be32(priority);
|
2013-04-18 04:30:26 +08:00
|
|
|
out:
|
2014-07-08 03:05:33 +08:00
|
|
|
args->rets[0] = cpu_to_be32(rc);
|
2013-04-18 04:30:26 +08:00
|
|
|
}
|
2013-04-18 04:32:04 +08:00
|
|
|
|
|
|
|
static void kvm_rtas_int_off(struct kvm_vcpu *vcpu, struct rtas_args *args)
|
|
|
|
{
|
|
|
|
u32 irq;
|
|
|
|
int rc;
|
|
|
|
|
2014-07-08 03:05:33 +08:00
|
|
|
if (be32_to_cpu(args->nargs) != 1 || be32_to_cpu(args->nret) != 1) {
|
2013-04-18 04:32:04 +08:00
|
|
|
rc = -3;
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
2014-07-08 03:05:33 +08:00
|
|
|
irq = be32_to_cpu(args->args[0]);
|
2013-04-18 04:32:04 +08:00
|
|
|
|
KVM: PPC: Book3S: Allow XICS emulation to work in nested hosts using XIVE
Currently, the KVM code assumes that if the host kernel is using the
XIVE interrupt controller (the new interrupt controller that first
appeared in POWER9 systems), then the in-kernel XICS emulation will
use the XIVE hardware to deliver interrupts to the guest. However,
this only works when the host is running in hypervisor mode and has
full access to all of the XIVE functionality. It doesn't work in any
nested virtualization scenario, either with PR KVM or nested-HV KVM,
because the XICS-on-XIVE code calls directly into the native-XIVE
routines, which are not initialized and cannot function correctly
because they use OPAL calls, and OPAL is not available in a guest.
This means that using the in-kernel XICS emulation in a nested
hypervisor that is using XIVE as its interrupt controller will cause a
(nested) host kernel crash. To fix this, we change most of the places
where the current code calls xive_enabled() to select between the
XICS-on-XIVE emulation and the plain XICS emulation to call a new
function, xics_on_xive(), which returns false in a guest.
However, there is a further twist. The plain XICS emulation has some
functions which are used in real mode and access the underlying XICS
controller (the interrupt controller of the host) directly. In the
case of a nested hypervisor, this means doing XICS hypercalls
directly. When the nested host is using XIVE as its interrupt
controller, these hypercalls will fail. Therefore this also adds
checks in the places where the XICS emulation wants to access the
underlying interrupt controller directly, and if that is XIVE, makes
the code use the virtual mode fallback paths, which call generic
kernel infrastructure rather than doing direct XICS access.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
Reviewed-by: Cédric Le Goater <clg@kaod.org>
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
2019-02-04 19:07:20 +08:00
|
|
|
if (xics_on_xive())
|
2017-04-05 15:54:56 +08:00
|
|
|
rc = kvmppc_xive_int_off(vcpu->kvm, irq);
|
|
|
|
else
|
|
|
|
rc = kvmppc_xics_int_off(vcpu->kvm, irq);
|
2013-04-18 04:32:04 +08:00
|
|
|
if (rc)
|
|
|
|
rc = -3;
|
|
|
|
out:
|
2014-07-08 03:05:33 +08:00
|
|
|
args->rets[0] = cpu_to_be32(rc);
|
2013-04-18 04:32:04 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static void kvm_rtas_int_on(struct kvm_vcpu *vcpu, struct rtas_args *args)
|
|
|
|
{
|
|
|
|
u32 irq;
|
|
|
|
int rc;
|
|
|
|
|
2014-07-08 03:05:33 +08:00
|
|
|
if (be32_to_cpu(args->nargs) != 1 || be32_to_cpu(args->nret) != 1) {
|
2013-04-18 04:32:04 +08:00
|
|
|
rc = -3;
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
2014-07-08 03:05:33 +08:00
|
|
|
irq = be32_to_cpu(args->args[0]);
|
2013-04-18 04:32:04 +08:00
|
|
|
|
KVM: PPC: Book3S: Allow XICS emulation to work in nested hosts using XIVE
Currently, the KVM code assumes that if the host kernel is using the
XIVE interrupt controller (the new interrupt controller that first
appeared in POWER9 systems), then the in-kernel XICS emulation will
use the XIVE hardware to deliver interrupts to the guest. However,
this only works when the host is running in hypervisor mode and has
full access to all of the XIVE functionality. It doesn't work in any
nested virtualization scenario, either with PR KVM or nested-HV KVM,
because the XICS-on-XIVE code calls directly into the native-XIVE
routines, which are not initialized and cannot function correctly
because they use OPAL calls, and OPAL is not available in a guest.
This means that using the in-kernel XICS emulation in a nested
hypervisor that is using XIVE as its interrupt controller will cause a
(nested) host kernel crash. To fix this, we change most of the places
where the current code calls xive_enabled() to select between the
XICS-on-XIVE emulation and the plain XICS emulation to call a new
function, xics_on_xive(), which returns false in a guest.
However, there is a further twist. The plain XICS emulation has some
functions which are used in real mode and access the underlying XICS
controller (the interrupt controller of the host) directly. In the
case of a nested hypervisor, this means doing XICS hypercalls
directly. When the nested host is using XIVE as its interrupt
controller, these hypercalls will fail. Therefore this also adds
checks in the places where the XICS emulation wants to access the
underlying interrupt controller directly, and if that is XIVE, makes
the code use the virtual mode fallback paths, which call generic
kernel infrastructure rather than doing direct XICS access.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
Reviewed-by: Cédric Le Goater <clg@kaod.org>
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
2019-02-04 19:07:20 +08:00
|
|
|
if (xics_on_xive())
|
2017-04-05 15:54:56 +08:00
|
|
|
rc = kvmppc_xive_int_on(vcpu->kvm, irq);
|
|
|
|
else
|
|
|
|
rc = kvmppc_xics_int_on(vcpu->kvm, irq);
|
2013-04-18 04:32:04 +08:00
|
|
|
if (rc)
|
|
|
|
rc = -3;
|
|
|
|
out:
|
2014-07-08 03:05:33 +08:00
|
|
|
args->rets[0] = cpu_to_be32(rc);
|
2013-04-18 04:32:04 +08:00
|
|
|
}
|
2013-04-18 04:30:26 +08:00
|
|
|
#endif /* CONFIG_KVM_XICS */
|
2013-04-18 04:30:00 +08:00
|
|
|
|
|
|
|
struct rtas_handler {
|
|
|
|
void (*handler)(struct kvm_vcpu *vcpu, struct rtas_args *args);
|
|
|
|
char *name;
|
|
|
|
};
|
|
|
|
|
2013-04-18 04:30:26 +08:00
|
|
|
static struct rtas_handler rtas_handlers[] = {
|
|
|
|
#ifdef CONFIG_KVM_XICS
|
|
|
|
{ .name = "ibm,set-xive", .handler = kvm_rtas_set_xive },
|
|
|
|
{ .name = "ibm,get-xive", .handler = kvm_rtas_get_xive },
|
2013-04-18 04:32:04 +08:00
|
|
|
{ .name = "ibm,int-off", .handler = kvm_rtas_int_off },
|
|
|
|
{ .name = "ibm,int-on", .handler = kvm_rtas_int_on },
|
2013-04-18 04:30:26 +08:00
|
|
|
#endif
|
|
|
|
};
|
2013-04-18 04:30:00 +08:00
|
|
|
|
|
|
|
struct rtas_token_definition {
|
|
|
|
struct list_head list;
|
|
|
|
struct rtas_handler *handler;
|
|
|
|
u64 token;
|
|
|
|
};
|
|
|
|
|
|
|
|
static int rtas_name_matches(char *s1, char *s2)
|
|
|
|
{
|
|
|
|
struct kvm_rtas_token_args args;
|
|
|
|
return !strncmp(s1, s2, sizeof(args.name));
|
|
|
|
}
|
|
|
|
|
|
|
|
static int rtas_token_undefine(struct kvm *kvm, char *name)
|
|
|
|
{
|
|
|
|
struct rtas_token_definition *d, *tmp;
|
|
|
|
|
2019-05-29 09:54:00 +08:00
|
|
|
lockdep_assert_held(&kvm->arch.rtas_token_lock);
|
2013-04-18 04:30:00 +08:00
|
|
|
|
|
|
|
list_for_each_entry_safe(d, tmp, &kvm->arch.rtas_tokens, list) {
|
|
|
|
if (rtas_name_matches(d->handler->name, name)) {
|
|
|
|
list_del(&d->list);
|
|
|
|
kfree(d);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* It's not an error to undefine an undefined token */
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int rtas_token_define(struct kvm *kvm, char *name, u64 token)
|
|
|
|
{
|
|
|
|
struct rtas_token_definition *d;
|
|
|
|
struct rtas_handler *h = NULL;
|
|
|
|
bool found;
|
|
|
|
int i;
|
|
|
|
|
2019-05-29 09:54:00 +08:00
|
|
|
lockdep_assert_held(&kvm->arch.rtas_token_lock);
|
2013-04-18 04:30:00 +08:00
|
|
|
|
|
|
|
list_for_each_entry(d, &kvm->arch.rtas_tokens, list) {
|
|
|
|
if (d->token == token)
|
|
|
|
return -EEXIST;
|
|
|
|
}
|
|
|
|
|
|
|
|
found = false;
|
|
|
|
for (i = 0; i < ARRAY_SIZE(rtas_handlers); i++) {
|
|
|
|
h = &rtas_handlers[i];
|
|
|
|
if (rtas_name_matches(h->name, name)) {
|
|
|
|
found = true;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!found)
|
|
|
|
return -ENOENT;
|
|
|
|
|
|
|
|
d = kzalloc(sizeof(*d), GFP_KERNEL);
|
|
|
|
if (!d)
|
|
|
|
return -ENOMEM;
|
|
|
|
|
|
|
|
d->handler = h;
|
|
|
|
d->token = token;
|
|
|
|
|
|
|
|
list_add_tail(&d->list, &kvm->arch.rtas_tokens);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
int kvm_vm_ioctl_rtas_define_token(struct kvm *kvm, void __user *argp)
|
|
|
|
{
|
|
|
|
struct kvm_rtas_token_args args;
|
|
|
|
int rc;
|
|
|
|
|
|
|
|
if (copy_from_user(&args, argp, sizeof(args)))
|
|
|
|
return -EFAULT;
|
|
|
|
|
2019-05-29 09:54:00 +08:00
|
|
|
mutex_lock(&kvm->arch.rtas_token_lock);
|
2013-04-18 04:30:00 +08:00
|
|
|
|
|
|
|
if (args.token)
|
|
|
|
rc = rtas_token_define(kvm, args.name, args.token);
|
|
|
|
else
|
|
|
|
rc = rtas_token_undefine(kvm, args.name);
|
|
|
|
|
2019-05-29 09:54:00 +08:00
|
|
|
mutex_unlock(&kvm->arch.rtas_token_lock);
|
2013-04-18 04:30:00 +08:00
|
|
|
|
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
|
|
|
|
int kvmppc_rtas_hcall(struct kvm_vcpu *vcpu)
|
|
|
|
{
|
|
|
|
struct rtas_token_definition *d;
|
|
|
|
struct rtas_args args;
|
|
|
|
rtas_arg_t *orig_rets;
|
|
|
|
gpa_t args_phys;
|
|
|
|
int rc;
|
|
|
|
|
2014-03-25 07:47:04 +08:00
|
|
|
/*
|
|
|
|
* r4 contains the guest physical address of the RTAS args
|
|
|
|
* Mask off the top 4 bits since this is a guest real address
|
|
|
|
*/
|
|
|
|
args_phys = kvmppc_get_gpr(vcpu, 4) & KVM_PAM;
|
2013-04-18 04:30:00 +08:00
|
|
|
|
|
|
|
rc = kvm_read_guest(vcpu->kvm, args_phys, &args, sizeof(args));
|
|
|
|
if (rc)
|
|
|
|
goto fail;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* args->rets is a pointer into args->args. Now that we've
|
|
|
|
* copied args we need to fix it up to point into our copy,
|
|
|
|
* not the guest args. We also need to save the original
|
|
|
|
* value so we can restore it on the way out.
|
|
|
|
*/
|
|
|
|
orig_rets = args.rets;
|
2014-07-08 03:05:33 +08:00
|
|
|
args.rets = &args.args[be32_to_cpu(args.nargs)];
|
2013-04-18 04:30:00 +08:00
|
|
|
|
2019-05-29 09:54:00 +08:00
|
|
|
mutex_lock(&vcpu->kvm->arch.rtas_token_lock);
|
2013-04-18 04:30:00 +08:00
|
|
|
|
|
|
|
rc = -ENOENT;
|
|
|
|
list_for_each_entry(d, &vcpu->kvm->arch.rtas_tokens, list) {
|
2014-07-08 03:05:33 +08:00
|
|
|
if (d->token == be32_to_cpu(args.token)) {
|
2013-04-18 04:30:00 +08:00
|
|
|
d->handler->handler(vcpu, &args);
|
|
|
|
rc = 0;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-05-29 09:54:00 +08:00
|
|
|
mutex_unlock(&vcpu->kvm->arch.rtas_token_lock);
|
2013-04-18 04:30:00 +08:00
|
|
|
|
|
|
|
if (rc == 0) {
|
|
|
|
args.rets = orig_rets;
|
|
|
|
rc = kvm_write_guest(vcpu->kvm, args_phys, &args, sizeof(args));
|
|
|
|
if (rc)
|
|
|
|
goto fail;
|
|
|
|
}
|
|
|
|
|
|
|
|
return rc;
|
|
|
|
|
|
|
|
fail:
|
|
|
|
/*
|
|
|
|
* We only get here if the guest has called RTAS with a bogus
|
|
|
|
* args pointer. That means we can't get to the args, and so we
|
|
|
|
* can't fail the RTAS call. So fail right out to userspace,
|
|
|
|
* which should kill the guest.
|
|
|
|
*/
|
|
|
|
return rc;
|
|
|
|
}
|
2013-10-08 00:47:59 +08:00
|
|
|
EXPORT_SYMBOL_GPL(kvmppc_rtas_hcall);
|
2013-04-18 04:30:00 +08:00
|
|
|
|
|
|
|
void kvmppc_rtas_tokens_free(struct kvm *kvm)
|
|
|
|
{
|
|
|
|
struct rtas_token_definition *d, *tmp;
|
|
|
|
|
|
|
|
list_for_each_entry_safe(d, tmp, &kvm->arch.rtas_tokens, list) {
|
|
|
|
list_del(&d->list);
|
|
|
|
kfree(d);
|
|
|
|
}
|
|
|
|
}
|