KVM: PPC: Book3S: Allocate guest TCEs on demand too
We already allocate hardware TCE tables in multiple levels and skip intermediate levels when we can, now it is a turn of the KVM TCE tables. Thankfully these are allocated already in 2 levels. This moves the table's last level allocation from the creating helper to kvmppc_tce_put() and kvm_spapr_tce_fault(). Since such allocation cannot be done in real mode, this creates a virtual mode version of kvmppc_tce_put() which handles allocations. This adds kvmppc_rm_ioba_validate() to do an additional test if the consequent kvmppc_tce_put() needs a page which has not been allocated; if this is the case, we bail out to virtual mode handlers. The allocations are protected by a new mutex as kvm->lock is not suitable for the task because the fault handler is called with the mmap_sem held but kvmhv_setup_mmu() locks kvm->lock and mmap_sem in the reverse order. Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru> Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
This commit is contained in:
parent
2001825efc
commit
e1a1ef84cd
|
@ -201,6 +201,8 @@ struct kvmppc_spapr_tce_iommu_table {
|
|||
struct kref kref;
|
||||
};
|
||||
|
||||
#define TCES_PER_PAGE (PAGE_SIZE / sizeof(u64))
|
||||
|
||||
struct kvmppc_spapr_tce_table {
|
||||
struct list_head list;
|
||||
struct kvm *kvm;
|
||||
|
@ -210,6 +212,7 @@ struct kvmppc_spapr_tce_table {
|
|||
u64 offset; /* in pages */
|
||||
u64 size; /* window size in pages */
|
||||
struct list_head iommu_tables;
|
||||
struct mutex alloc_lock;
|
||||
struct page *pages[0];
|
||||
};
|
||||
|
||||
|
|
|
@ -197,8 +197,6 @@ extern struct kvmppc_spapr_tce_table *kvmppc_find_table(
|
|||
(iommu_tce_check_ioba((stt)->page_shift, (stt)->offset, \
|
||||
(stt)->size, (ioba), (npages)) ? \
|
||||
H_PARAMETER : H_SUCCESS)
|
||||
extern void kvmppc_tce_put(struct kvmppc_spapr_tce_table *tt,
|
||||
unsigned long idx, unsigned long tce);
|
||||
extern long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
|
||||
unsigned long ioba, unsigned long tce);
|
||||
extern long kvmppc_h_put_tce_indirect(struct kvm_vcpu *vcpu,
|
||||
|
|
|
@ -228,11 +228,33 @@ static void release_spapr_tce_table(struct rcu_head *head)
|
|||
unsigned long i, npages = kvmppc_tce_pages(stt->size);
|
||||
|
||||
for (i = 0; i < npages; i++)
|
||||
__free_page(stt->pages[i]);
|
||||
if (stt->pages[i])
|
||||
__free_page(stt->pages[i]);
|
||||
|
||||
kfree(stt);
|
||||
}
|
||||
|
||||
static struct page *kvm_spapr_get_tce_page(struct kvmppc_spapr_tce_table *stt,
|
||||
unsigned long sttpage)
|
||||
{
|
||||
struct page *page = stt->pages[sttpage];
|
||||
|
||||
if (page)
|
||||
return page;
|
||||
|
||||
mutex_lock(&stt->alloc_lock);
|
||||
page = stt->pages[sttpage];
|
||||
if (!page) {
|
||||
page = alloc_page(GFP_KERNEL | __GFP_ZERO);
|
||||
WARN_ON_ONCE(!page);
|
||||
if (page)
|
||||
stt->pages[sttpage] = page;
|
||||
}
|
||||
mutex_unlock(&stt->alloc_lock);
|
||||
|
||||
return page;
|
||||
}
|
||||
|
||||
static vm_fault_t kvm_spapr_tce_fault(struct vm_fault *vmf)
|
||||
{
|
||||
struct kvmppc_spapr_tce_table *stt = vmf->vma->vm_file->private_data;
|
||||
|
@ -241,7 +263,10 @@ static vm_fault_t kvm_spapr_tce_fault(struct vm_fault *vmf)
|
|||
if (vmf->pgoff >= kvmppc_tce_pages(stt->size))
|
||||
return VM_FAULT_SIGBUS;
|
||||
|
||||
page = stt->pages[vmf->pgoff];
|
||||
page = kvm_spapr_get_tce_page(stt, vmf->pgoff);
|
||||
if (!page)
|
||||
return VM_FAULT_OOM;
|
||||
|
||||
get_page(page);
|
||||
vmf->page = page;
|
||||
return 0;
|
||||
|
@ -296,7 +321,6 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
|
|||
struct kvmppc_spapr_tce_table *siter;
|
||||
unsigned long npages, size = args->size;
|
||||
int ret = -ENOMEM;
|
||||
int i;
|
||||
|
||||
if (!args->size || args->page_shift < 12 || args->page_shift > 34 ||
|
||||
(args->offset + args->size > (ULLONG_MAX >> args->page_shift)))
|
||||
|
@ -318,14 +342,9 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
|
|||
stt->offset = args->offset;
|
||||
stt->size = size;
|
||||
stt->kvm = kvm;
|
||||
mutex_init(&stt->alloc_lock);
|
||||
INIT_LIST_HEAD_RCU(&stt->iommu_tables);
|
||||
|
||||
for (i = 0; i < npages; i++) {
|
||||
stt->pages[i] = alloc_page(GFP_KERNEL | __GFP_ZERO);
|
||||
if (!stt->pages[i])
|
||||
goto fail;
|
||||
}
|
||||
|
||||
mutex_lock(&kvm->lock);
|
||||
|
||||
/* Check this LIOBN hasn't been previously allocated */
|
||||
|
@ -352,11 +371,6 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
|
|||
if (ret >= 0)
|
||||
return ret;
|
||||
|
||||
fail:
|
||||
for (i = 0; i < npages; i++)
|
||||
if (stt->pages[i])
|
||||
__free_page(stt->pages[i]);
|
||||
|
||||
kfree(stt);
|
||||
fail_acct:
|
||||
kvmppc_account_memlimit(kvmppc_stt_pages(npages), false);
|
||||
|
@ -413,6 +427,36 @@ static long kvmppc_tce_validate(struct kvmppc_spapr_tce_table *stt,
|
|||
return H_SUCCESS;
|
||||
}
|
||||
|
||||
/*
|
||||
* Handles TCE requests for emulated devices.
|
||||
* Puts guest TCE values to the table and expects user space to convert them.
|
||||
* Cannot fail so kvmppc_tce_validate must be called before it.
|
||||
*/
|
||||
static void kvmppc_tce_put(struct kvmppc_spapr_tce_table *stt,
|
||||
unsigned long idx, unsigned long tce)
|
||||
{
|
||||
struct page *page;
|
||||
u64 *tbl;
|
||||
unsigned long sttpage;
|
||||
|
||||
idx -= stt->offset;
|
||||
sttpage = idx / TCES_PER_PAGE;
|
||||
page = stt->pages[sttpage];
|
||||
|
||||
if (!page) {
|
||||
/* We allow any TCE, not just with read|write permissions */
|
||||
if (!tce)
|
||||
return;
|
||||
|
||||
page = kvm_spapr_get_tce_page(stt, sttpage);
|
||||
if (!page)
|
||||
return;
|
||||
}
|
||||
tbl = page_to_virt(page);
|
||||
|
||||
tbl[idx % TCES_PER_PAGE] = tce;
|
||||
}
|
||||
|
||||
static void kvmppc_clear_tce(struct mm_struct *mm, struct iommu_table *tbl,
|
||||
unsigned long entry)
|
||||
{
|
||||
|
|
|
@ -66,8 +66,6 @@
|
|||
|
||||
#endif
|
||||
|
||||
#define TCES_PER_PAGE (PAGE_SIZE / sizeof(u64))
|
||||
|
||||
/*
|
||||
* Finds a TCE table descriptor by LIOBN.
|
||||
*
|
||||
|
@ -148,7 +146,6 @@ static long kvmppc_rm_tce_validate(struct kvmppc_spapr_tce_table *stt,
|
|||
|
||||
return H_SUCCESS;
|
||||
}
|
||||
#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
|
||||
|
||||
/* Note on the use of page_address() in real mode,
|
||||
*
|
||||
|
@ -180,13 +177,9 @@ static u64 *kvmppc_page_address(struct page *page)
|
|||
/*
|
||||
* Handles TCE requests for emulated devices.
|
||||
* Puts guest TCE values to the table and expects user space to convert them.
|
||||
* Called in both real and virtual modes.
|
||||
* Cannot fail so kvmppc_tce_validate must be called before it.
|
||||
*
|
||||
* WARNING: This will be called in real-mode on HV KVM and virtual
|
||||
* mode on PR KVM
|
||||
* Cannot fail so kvmppc_rm_tce_validate must be called before it.
|
||||
*/
|
||||
void kvmppc_tce_put(struct kvmppc_spapr_tce_table *stt,
|
||||
static void kvmppc_rm_tce_put(struct kvmppc_spapr_tce_table *stt,
|
||||
unsigned long idx, unsigned long tce)
|
||||
{
|
||||
struct page *page;
|
||||
|
@ -194,13 +187,48 @@ void kvmppc_tce_put(struct kvmppc_spapr_tce_table *stt,
|
|||
|
||||
idx -= stt->offset;
|
||||
page = stt->pages[idx / TCES_PER_PAGE];
|
||||
/*
|
||||
* page must not be NULL in real mode,
|
||||
* kvmppc_rm_ioba_validate() must have taken care of this.
|
||||
*/
|
||||
WARN_ON_ONCE_RM(!page);
|
||||
tbl = kvmppc_page_address(page);
|
||||
|
||||
tbl[idx % TCES_PER_PAGE] = tce;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvmppc_tce_put);
|
||||
|
||||
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
|
||||
/*
|
||||
* TCEs pages are allocated in kvmppc_rm_tce_put() which won't be able to do so
|
||||
* in real mode.
|
||||
* Check if kvmppc_rm_tce_put() can succeed in real mode, i.e. a TCEs page is
|
||||
* allocated or not required (when clearing a tce entry).
|
||||
*/
|
||||
static long kvmppc_rm_ioba_validate(struct kvmppc_spapr_tce_table *stt,
|
||||
unsigned long ioba, unsigned long npages, bool clearing)
|
||||
{
|
||||
unsigned long i, idx, sttpage, sttpages;
|
||||
unsigned long ret = kvmppc_ioba_validate(stt, ioba, npages);
|
||||
|
||||
if (ret)
|
||||
return ret;
|
||||
/*
|
||||
* clearing==true says kvmppc_rm_tce_put won't be allocating pages
|
||||
* for empty tces.
|
||||
*/
|
||||
if (clearing)
|
||||
return H_SUCCESS;
|
||||
|
||||
idx = (ioba >> stt->page_shift) - stt->offset;
|
||||
sttpage = idx / TCES_PER_PAGE;
|
||||
sttpages = _ALIGN_UP(idx % TCES_PER_PAGE + npages, TCES_PER_PAGE) /
|
||||
TCES_PER_PAGE;
|
||||
for (i = sttpage; i < sttpage + sttpages; ++i)
|
||||
if (!stt->pages[i])
|
||||
return H_TOO_HARD;
|
||||
|
||||
return H_SUCCESS;
|
||||
}
|
||||
|
||||
static long iommu_tce_xchg_rm(struct mm_struct *mm, struct iommu_table *tbl,
|
||||
unsigned long entry, unsigned long *hpa,
|
||||
enum dma_data_direction *direction)
|
||||
|
@ -378,7 +406,7 @@ long kvmppc_rm_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
|
|||
if (!stt)
|
||||
return H_TOO_HARD;
|
||||
|
||||
ret = kvmppc_ioba_validate(stt, ioba, 1);
|
||||
ret = kvmppc_rm_ioba_validate(stt, ioba, 1, tce == 0);
|
||||
if (ret != H_SUCCESS)
|
||||
return ret;
|
||||
|
||||
|
@ -406,7 +434,7 @@ long kvmppc_rm_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
|
|||
}
|
||||
}
|
||||
|
||||
kvmppc_tce_put(stt, entry, tce);
|
||||
kvmppc_rm_tce_put(stt, entry, tce);
|
||||
|
||||
return H_SUCCESS;
|
||||
}
|
||||
|
@ -477,7 +505,7 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu,
|
|||
if (tce_list & (SZ_4K - 1))
|
||||
return H_PARAMETER;
|
||||
|
||||
ret = kvmppc_ioba_validate(stt, ioba, npages);
|
||||
ret = kvmppc_rm_ioba_validate(stt, ioba, npages, false);
|
||||
if (ret != H_SUCCESS)
|
||||
return ret;
|
||||
|
||||
|
@ -554,7 +582,7 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu,
|
|||
}
|
||||
}
|
||||
|
||||
kvmppc_tce_put(stt, entry + i, tce);
|
||||
kvmppc_rm_tce_put(stt, entry + i, tce);
|
||||
}
|
||||
|
||||
unlock_exit:
|
||||
|
@ -580,7 +608,7 @@ long kvmppc_rm_h_stuff_tce(struct kvm_vcpu *vcpu,
|
|||
if (!stt)
|
||||
return H_TOO_HARD;
|
||||
|
||||
ret = kvmppc_ioba_validate(stt, ioba, npages);
|
||||
ret = kvmppc_rm_ioba_validate(stt, ioba, npages, tce_value == 0);
|
||||
if (ret != H_SUCCESS)
|
||||
return ret;
|
||||
|
||||
|
@ -607,7 +635,7 @@ long kvmppc_rm_h_stuff_tce(struct kvm_vcpu *vcpu,
|
|||
}
|
||||
|
||||
for (i = 0; i < npages; ++i, ioba += (1ULL << stt->page_shift))
|
||||
kvmppc_tce_put(stt, ioba >> stt->page_shift, tce_value);
|
||||
kvmppc_rm_tce_put(stt, ioba >> stt->page_shift, tce_value);
|
||||
|
||||
return H_SUCCESS;
|
||||
}
|
||||
|
@ -632,6 +660,10 @@ long kvmppc_h_get_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
|
|||
|
||||
idx = (ioba >> stt->page_shift) - stt->offset;
|
||||
page = stt->pages[idx / TCES_PER_PAGE];
|
||||
if (!page) {
|
||||
vcpu->arch.regs.gpr[4] = 0;
|
||||
return H_SUCCESS;
|
||||
}
|
||||
tbl = (u64 *)page_address(page);
|
||||
|
||||
vcpu->arch.regs.gpr[4] = tbl[idx % TCES_PER_PAGE];
|
||||
|
|
Loading…
Reference in New Issue