KVM: s390: ioctls to get and set guest storage attributes
* Add the struct used in the ioctls to get and set CMMA attributes. * Add the two functions needed to get and set the CMMA attributes for guest pages. * Add the two ioctls that use the aforementioned functions. Signed-off-by: Claudio Imbrenda <imbrenda@linux.vnet.ibm.com> Acked-by: Cornelia Huck <cornelia.huck@de.ibm.com> Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
This commit is contained in:
parent
190df4a212
commit
4036e3874a
|
@ -3255,6 +3255,141 @@ Otherwise, if the MCE is a corrected error, KVM will just
|
|||
store it in the corresponding bank (provided this bank is
|
||||
not holding a previously reported uncorrected error).
|
||||
|
||||
4.107 KVM_S390_GET_CMMA_BITS
|
||||
|
||||
Capability: KVM_CAP_S390_CMMA_MIGRATION
|
||||
Architectures: s390
|
||||
Type: vm ioctl
|
||||
Parameters: struct kvm_s390_cmma_log (in, out)
|
||||
Returns: 0 on success, a negative value on error
|
||||
|
||||
This ioctl is used to get the values of the CMMA bits on the s390
|
||||
architecture. It is meant to be used in two scenarios:
|
||||
- During live migration to save the CMMA values. Live migration needs
|
||||
to be enabled via the KVM_REQ_START_MIGRATION VM property.
|
||||
- To non-destructively peek at the CMMA values, with the flag
|
||||
KVM_S390_CMMA_PEEK set.
|
||||
|
||||
The ioctl takes parameters via the kvm_s390_cmma_log struct. The desired
|
||||
values are written to a buffer whose location is indicated via the "values"
|
||||
member in the kvm_s390_cmma_log struct. The values in the input struct are
|
||||
also updated as needed.
|
||||
Each CMMA value takes up one byte.
|
||||
|
||||
struct kvm_s390_cmma_log {
|
||||
__u64 start_gfn;
|
||||
__u32 count;
|
||||
__u32 flags;
|
||||
union {
|
||||
__u64 remaining;
|
||||
__u64 mask;
|
||||
};
|
||||
__u64 values;
|
||||
};
|
||||
|
||||
start_gfn is the number of the first guest frame whose CMMA values are
|
||||
to be retrieved,
|
||||
|
||||
count is the length of the buffer in bytes,
|
||||
|
||||
values points to the buffer where the result will be written to.
|
||||
|
||||
If count is greater than KVM_S390_SKEYS_MAX, then it is considered to be
|
||||
KVM_S390_SKEYS_MAX. KVM_S390_SKEYS_MAX is re-used for consistency with
|
||||
other ioctls.
|
||||
|
||||
The result is written in the buffer pointed to by the field values, and
|
||||
the values of the input parameter are updated as follows.
|
||||
|
||||
Depending on the flags, different actions are performed. The only
|
||||
supported flag so far is KVM_S390_CMMA_PEEK.
|
||||
|
||||
The default behaviour if KVM_S390_CMMA_PEEK is not set is:
|
||||
start_gfn will indicate the first page frame whose CMMA bits were dirty.
|
||||
It is not necessarily the same as the one passed as input, as clean pages
|
||||
are skipped.
|
||||
|
||||
count will indicate the number of bytes actually written in the buffer.
|
||||
It can (and very often will) be smaller than the input value, since the
|
||||
buffer is only filled until 16 bytes of clean values are found (which
|
||||
are then not copied in the buffer). Since a CMMA migration block needs
|
||||
the base address and the length, for a total of 16 bytes, we will send
|
||||
back some clean data if there is some dirty data afterwards, as long as
|
||||
the size of the clean data does not exceed the size of the header. This
|
||||
allows to minimize the amount of data to be saved or transferred over
|
||||
the network at the expense of more roundtrips to userspace. The next
|
||||
invocation of the ioctl will skip over all the clean values, saving
|
||||
potentially more than just the 16 bytes we found.
|
||||
|
||||
If KVM_S390_CMMA_PEEK is set:
|
||||
the existing storage attributes are read even when not in migration
|
||||
mode, and no other action is performed;
|
||||
|
||||
the output start_gfn will be equal to the input start_gfn,
|
||||
|
||||
the output count will be equal to the input count, except if the end of
|
||||
memory has been reached.
|
||||
|
||||
In both cases:
|
||||
the field "remaining" will indicate the total number of dirty CMMA values
|
||||
still remaining, or 0 if KVM_S390_CMMA_PEEK is set and migration mode is
|
||||
not enabled.
|
||||
|
||||
mask is unused.
|
||||
|
||||
values points to the userspace buffer where the result will be stored.
|
||||
|
||||
This ioctl can fail with -ENOMEM if not enough memory can be allocated to
|
||||
complete the task, with -ENXIO if CMMA is not enabled, with -EINVAL if
|
||||
KVM_S390_CMMA_PEEK is not set but migration mode was not enabled, with
|
||||
-EFAULT if the userspace address is invalid or if no page table is
|
||||
present for the addresses (e.g. when using hugepages).
|
||||
|
||||
4.108 KVM_S390_SET_CMMA_BITS
|
||||
|
||||
Capability: KVM_CAP_S390_CMMA_MIGRATION
|
||||
Architectures: s390
|
||||
Type: vm ioctl
|
||||
Parameters: struct kvm_s390_cmma_log (in)
|
||||
Returns: 0 on success, a negative value on error
|
||||
|
||||
This ioctl is used to set the values of the CMMA bits on the s390
|
||||
architecture. It is meant to be used during live migration to restore
|
||||
the CMMA values, but there are no restrictions on its use.
|
||||
The ioctl takes parameters via the kvm_s390_cmma_values struct.
|
||||
Each CMMA value takes up one byte.
|
||||
|
||||
struct kvm_s390_cmma_log {
|
||||
__u64 start_gfn;
|
||||
__u32 count;
|
||||
__u32 flags;
|
||||
union {
|
||||
__u64 remaining;
|
||||
__u64 mask;
|
||||
};
|
||||
__u64 values;
|
||||
};
|
||||
|
||||
start_gfn indicates the starting guest frame number,
|
||||
|
||||
count indicates how many values are to be considered in the buffer,
|
||||
|
||||
flags is not used and must be 0.
|
||||
|
||||
mask indicates which PGSTE bits are to be considered.
|
||||
|
||||
remaining is not used.
|
||||
|
||||
values points to the buffer in userspace where to store the values.
|
||||
|
||||
This ioctl can fail with -ENOMEM if not enough memory can be allocated to
|
||||
complete the task, with -ENXIO if CMMA is not enabled, with -EINVAL if
|
||||
the count field is too large (e.g. more than KVM_S390_CMMA_SIZE_MAX) or
|
||||
if the flags field was not 0, with -EFAULT if the userspace address is
|
||||
invalid, if invalid pages are written to (e.g. after the end of memory)
|
||||
or if no page table is present for the addresses (e.g. when using
|
||||
hugepages).
|
||||
|
||||
5. The kvm_run structure
|
||||
------------------------
|
||||
|
||||
|
|
|
@ -30,8 +30,8 @@
|
|||
#include <linux/vmalloc.h>
|
||||
#include <linux/bitmap.h>
|
||||
#include <linux/sched/signal.h>
|
||||
|
||||
#include <linux/string.h>
|
||||
|
||||
#include <asm/asm-offsets.h>
|
||||
#include <asm/lowcore.h>
|
||||
#include <asm/stp.h>
|
||||
|
@ -387,6 +387,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
|
|||
case KVM_CAP_S390_SKEYS:
|
||||
case KVM_CAP_S390_IRQ_STATE:
|
||||
case KVM_CAP_S390_USER_INSTR0:
|
||||
case KVM_CAP_S390_CMMA_MIGRATION:
|
||||
case KVM_CAP_S390_AIS:
|
||||
r = 1;
|
||||
break;
|
||||
|
@ -1419,6 +1420,182 @@ out:
|
|||
return r;
|
||||
}
|
||||
|
||||
/*
|
||||
* Base address and length must be sent at the start of each block, therefore
|
||||
* it's cheaper to send some clean data, as long as it's less than the size of
|
||||
* two longs.
|
||||
*/
|
||||
#define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
|
||||
/* for consistency */
|
||||
#define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
|
||||
|
||||
/*
|
||||
* This function searches for the next page with dirty CMMA attributes, and
|
||||
* saves the attributes in the buffer up to either the end of the buffer or
|
||||
* until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
|
||||
* no trailing clean bytes are saved.
|
||||
* In case no dirty bits were found, or if CMMA was not enabled or used, the
|
||||
* output buffer will indicate 0 as length.
|
||||
*/
|
||||
static int kvm_s390_get_cmma_bits(struct kvm *kvm,
|
||||
struct kvm_s390_cmma_log *args)
|
||||
{
|
||||
struct kvm_s390_migration_state *s = kvm->arch.migration_state;
|
||||
unsigned long bufsize, hva, pgstev, i, next, cur;
|
||||
int srcu_idx, peek, r = 0, rr;
|
||||
u8 *res;
|
||||
|
||||
cur = args->start_gfn;
|
||||
i = next = pgstev = 0;
|
||||
|
||||
if (unlikely(!kvm->arch.use_cmma))
|
||||
return -ENXIO;
|
||||
/* Invalid/unsupported flags were specified */
|
||||
if (args->flags & ~KVM_S390_CMMA_PEEK)
|
||||
return -EINVAL;
|
||||
/* Migration mode query, and we are not doing a migration */
|
||||
peek = !!(args->flags & KVM_S390_CMMA_PEEK);
|
||||
if (!peek && !s)
|
||||
return -EINVAL;
|
||||
/* CMMA is disabled or was not used, or the buffer has length zero */
|
||||
bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
|
||||
if (!bufsize || !kvm->mm->context.use_cmma) {
|
||||
memset(args, 0, sizeof(*args));
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (!peek) {
|
||||
/* We are not peeking, and there are no dirty pages */
|
||||
if (!atomic64_read(&s->dirty_pages)) {
|
||||
memset(args, 0, sizeof(*args));
|
||||
return 0;
|
||||
}
|
||||
cur = find_next_bit(s->pgste_bitmap, s->bitmap_size,
|
||||
args->start_gfn);
|
||||
if (cur >= s->bitmap_size) /* nothing found, loop back */
|
||||
cur = find_next_bit(s->pgste_bitmap, s->bitmap_size, 0);
|
||||
if (cur >= s->bitmap_size) { /* again! (very unlikely) */
|
||||
memset(args, 0, sizeof(*args));
|
||||
return 0;
|
||||
}
|
||||
next = find_next_bit(s->pgste_bitmap, s->bitmap_size, cur + 1);
|
||||
}
|
||||
|
||||
res = vmalloc(bufsize);
|
||||
if (!res)
|
||||
return -ENOMEM;
|
||||
|
||||
args->start_gfn = cur;
|
||||
|
||||
down_read(&kvm->mm->mmap_sem);
|
||||
srcu_idx = srcu_read_lock(&kvm->srcu);
|
||||
while (i < bufsize) {
|
||||
hva = gfn_to_hva(kvm, cur);
|
||||
if (kvm_is_error_hva(hva)) {
|
||||
r = -EFAULT;
|
||||
break;
|
||||
}
|
||||
/* decrement only if we actually flipped the bit to 0 */
|
||||
if (!peek && test_and_clear_bit(cur, s->pgste_bitmap))
|
||||
atomic64_dec(&s->dirty_pages);
|
||||
r = get_pgste(kvm->mm, hva, &pgstev);
|
||||
if (r < 0)
|
||||
pgstev = 0;
|
||||
/* save the value */
|
||||
res[i++] = (pgstev >> 24) & 0x3;
|
||||
/*
|
||||
* if the next bit is too far away, stop.
|
||||
* if we reached the previous "next", find the next one
|
||||
*/
|
||||
if (!peek) {
|
||||
if (next > cur + KVM_S390_MAX_BIT_DISTANCE)
|
||||
break;
|
||||
if (cur == next)
|
||||
next = find_next_bit(s->pgste_bitmap,
|
||||
s->bitmap_size, cur + 1);
|
||||
/* reached the end of the bitmap or of the buffer, stop */
|
||||
if ((next >= s->bitmap_size) ||
|
||||
(next >= args->start_gfn + bufsize))
|
||||
break;
|
||||
}
|
||||
cur++;
|
||||
}
|
||||
srcu_read_unlock(&kvm->srcu, srcu_idx);
|
||||
up_read(&kvm->mm->mmap_sem);
|
||||
args->count = i;
|
||||
args->remaining = s ? atomic64_read(&s->dirty_pages) : 0;
|
||||
|
||||
rr = copy_to_user((void __user *)args->values, res, args->count);
|
||||
if (rr)
|
||||
r = -EFAULT;
|
||||
|
||||
vfree(res);
|
||||
return r;
|
||||
}
|
||||
|
||||
/*
|
||||
* This function sets the CMMA attributes for the given pages. If the input
|
||||
* buffer has zero length, no action is taken, otherwise the attributes are
|
||||
* set and the mm->context.use_cmma flag is set.
|
||||
*/
|
||||
static int kvm_s390_set_cmma_bits(struct kvm *kvm,
|
||||
const struct kvm_s390_cmma_log *args)
|
||||
{
|
||||
unsigned long hva, mask, pgstev, i;
|
||||
uint8_t *bits;
|
||||
int srcu_idx, r = 0;
|
||||
|
||||
mask = args->mask;
|
||||
|
||||
if (!kvm->arch.use_cmma)
|
||||
return -ENXIO;
|
||||
/* invalid/unsupported flags */
|
||||
if (args->flags != 0)
|
||||
return -EINVAL;
|
||||
/* Enforce sane limit on memory allocation */
|
||||
if (args->count > KVM_S390_CMMA_SIZE_MAX)
|
||||
return -EINVAL;
|
||||
/* Nothing to do */
|
||||
if (args->count == 0)
|
||||
return 0;
|
||||
|
||||
bits = vmalloc(sizeof(*bits) * args->count);
|
||||
if (!bits)
|
||||
return -ENOMEM;
|
||||
|
||||
r = copy_from_user(bits, (void __user *)args->values, args->count);
|
||||
if (r) {
|
||||
r = -EFAULT;
|
||||
goto out;
|
||||
}
|
||||
|
||||
down_read(&kvm->mm->mmap_sem);
|
||||
srcu_idx = srcu_read_lock(&kvm->srcu);
|
||||
for (i = 0; i < args->count; i++) {
|
||||
hva = gfn_to_hva(kvm, args->start_gfn + i);
|
||||
if (kvm_is_error_hva(hva)) {
|
||||
r = -EFAULT;
|
||||
break;
|
||||
}
|
||||
|
||||
pgstev = bits[i];
|
||||
pgstev = pgstev << 24;
|
||||
mask &= _PGSTE_GPS_USAGE_MASK;
|
||||
set_pgste_bits(kvm->mm, hva, mask, pgstev);
|
||||
}
|
||||
srcu_read_unlock(&kvm->srcu, srcu_idx);
|
||||
up_read(&kvm->mm->mmap_sem);
|
||||
|
||||
if (!kvm->mm->context.use_cmma) {
|
||||
down_write(&kvm->mm->mmap_sem);
|
||||
kvm->mm->context.use_cmma = 1;
|
||||
up_write(&kvm->mm->mmap_sem);
|
||||
}
|
||||
out:
|
||||
vfree(bits);
|
||||
return r;
|
||||
}
|
||||
|
||||
long kvm_arch_vm_ioctl(struct file *filp,
|
||||
unsigned int ioctl, unsigned long arg)
|
||||
{
|
||||
|
@ -1497,6 +1674,29 @@ long kvm_arch_vm_ioctl(struct file *filp,
|
|||
r = kvm_s390_set_skeys(kvm, &args);
|
||||
break;
|
||||
}
|
||||
case KVM_S390_GET_CMMA_BITS: {
|
||||
struct kvm_s390_cmma_log args;
|
||||
|
||||
r = -EFAULT;
|
||||
if (copy_from_user(&args, argp, sizeof(args)))
|
||||
break;
|
||||
r = kvm_s390_get_cmma_bits(kvm, &args);
|
||||
if (!r) {
|
||||
r = copy_to_user(argp, &args, sizeof(args));
|
||||
if (r)
|
||||
r = -EFAULT;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case KVM_S390_SET_CMMA_BITS: {
|
||||
struct kvm_s390_cmma_log args;
|
||||
|
||||
r = -EFAULT;
|
||||
if (copy_from_user(&args, argp, sizeof(args)))
|
||||
break;
|
||||
r = kvm_s390_set_cmma_bits(kvm, &args);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
r = -ENOTTY;
|
||||
}
|
||||
|
|
|
@ -155,6 +155,35 @@ struct kvm_s390_skeys {
|
|||
__u32 reserved[9];
|
||||
};
|
||||
|
||||
#define KVM_S390_CMMA_PEEK (1 << 0)
|
||||
|
||||
/**
|
||||
* kvm_s390_cmma_log - Used for CMMA migration.
|
||||
*
|
||||
* Used both for input and output.
|
||||
*
|
||||
* @start_gfn: Guest page number to start from.
|
||||
* @count: Size of the result buffer.
|
||||
* @flags: Control operation mode via KVM_S390_CMMA_* flags
|
||||
* @remaining: Used with KVM_S390_GET_CMMA_BITS. Indicates how many dirty
|
||||
* pages are still remaining.
|
||||
* @mask: Used with KVM_S390_SET_CMMA_BITS. Bitmap of bits to actually set
|
||||
* in the PGSTE.
|
||||
* @values: Pointer to the values buffer.
|
||||
*
|
||||
* Used in KVM_S390_{G,S}ET_CMMA_BITS ioctls.
|
||||
*/
|
||||
struct kvm_s390_cmma_log {
|
||||
__u64 start_gfn;
|
||||
__u32 count;
|
||||
__u32 flags;
|
||||
union {
|
||||
__u64 remaining;
|
||||
__u64 mask;
|
||||
};
|
||||
__u64 values;
|
||||
};
|
||||
|
||||
struct kvm_hyperv_exit {
|
||||
#define KVM_EXIT_HYPERV_SYNIC 1
|
||||
#define KVM_EXIT_HYPERV_HCALL 2
|
||||
|
@ -895,6 +924,7 @@ struct kvm_ppc_resize_hpt {
|
|||
#define KVM_CAP_SPAPR_TCE_VFIO 142
|
||||
#define KVM_CAP_X86_GUEST_MWAIT 143
|
||||
#define KVM_CAP_ARM_USER_IRQ 144
|
||||
#define KVM_CAP_S390_CMMA_MIGRATION 145
|
||||
|
||||
#ifdef KVM_CAP_IRQ_ROUTING
|
||||
|
||||
|
@ -1318,6 +1348,9 @@ struct kvm_s390_ucas_mapping {
|
|||
#define KVM_S390_GET_IRQ_STATE _IOW(KVMIO, 0xb6, struct kvm_s390_irq_state)
|
||||
/* Available with KVM_CAP_X86_SMM */
|
||||
#define KVM_SMI _IO(KVMIO, 0xb7)
|
||||
/* Available with KVM_CAP_S390_CMMA_MIGRATION */
|
||||
#define KVM_S390_GET_CMMA_BITS _IOW(KVMIO, 0xb8, struct kvm_s390_cmma_log)
|
||||
#define KVM_S390_SET_CMMA_BITS _IOW(KVMIO, 0xb9, struct kvm_s390_cmma_log)
|
||||
|
||||
#define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0)
|
||||
#define KVM_DEV_ASSIGN_PCI_2_3 (1 << 1)
|
||||
|
|
Loading…
Reference in New Issue