powerpc/powernv/sriov: Make single PE mode a per-BAR setting
Using single PE BARs to map an SR-IOV BAR is really a choice about what strategy to use when mapping a BAR. It doesn't make much sense for this to be a global setting since a device might have one large BAR which needs to be mapped with single PE windows and another smaller BAR that can be mapped with a regular segmented window. Make the segmented vs single decision a per-BAR setting and clean up the logic that decides which mode to use. Signed-off-by: Oliver O'Halloran <oohall@gmail.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/20200722065715.1432738-15-oohall@gmail.com
This commit is contained in:
parent
a0be516f81
commit
4c51f3e1e8
|
@ -146,21 +146,17 @@
|
|||
static void pnv_pci_ioda_fixup_iov_resources(struct pci_dev *pdev)
|
||||
{
|
||||
struct pnv_phb *phb = pci_bus_to_pnvhb(pdev->bus);
|
||||
const resource_size_t gate = phb->ioda.m64_segsize >> 2;
|
||||
struct resource *res;
|
||||
int i;
|
||||
resource_size_t size, total_vf_bar_sz;
|
||||
resource_size_t vf_bar_sz;
|
||||
struct pnv_iov_data *iov;
|
||||
int mul, total_vfs;
|
||||
int mul;
|
||||
|
||||
iov = kzalloc(sizeof(*iov), GFP_KERNEL);
|
||||
if (!iov)
|
||||
goto disable_iov;
|
||||
pdev->dev.archdata.iov_data = iov;
|
||||
|
||||
total_vfs = pci_sriov_get_totalvfs(pdev);
|
||||
mul = phb->ioda.total_pe_num;
|
||||
total_vf_bar_sz = 0;
|
||||
|
||||
for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
|
||||
res = &pdev->resource[i + PCI_IOV_RESOURCES];
|
||||
|
@ -172,50 +168,50 @@ static void pnv_pci_ioda_fixup_iov_resources(struct pci_dev *pdev)
|
|||
goto disable_iov;
|
||||
}
|
||||
|
||||
total_vf_bar_sz += pci_iov_resource_size(pdev,
|
||||
i + PCI_IOV_RESOURCES);
|
||||
vf_bar_sz = pci_iov_resource_size(pdev, i + PCI_IOV_RESOURCES);
|
||||
|
||||
/*
|
||||
* If bigger than quarter of M64 segment size, just round up
|
||||
* power of two.
|
||||
* Generally, one segmented M64 BAR maps one IOV BAR. However,
|
||||
* if a VF BAR is too large we end up wasting a lot of space.
|
||||
* If each VF needs more than 1/4 of the default m64 segment
|
||||
* then each VF BAR should be mapped in single-PE mode to reduce
|
||||
* the amount of space required. This does however limit the
|
||||
* number of VFs we can support.
|
||||
*
|
||||
* Generally, one M64 BAR maps one IOV BAR. To avoid conflict
|
||||
* with other devices, IOV BAR size is expanded to be
|
||||
* (total_pe * VF_BAR_size). When VF_BAR_size is half of M64
|
||||
* segment size , the expanded size would equal to half of the
|
||||
* whole M64 space size, which will exhaust the M64 Space and
|
||||
* limit the system flexibility. This is a design decision to
|
||||
* set the boundary to quarter of the M64 segment size.
|
||||
* The 1/4 limit is arbitrary and can be tweaked.
|
||||
*/
|
||||
if (total_vf_bar_sz > gate) {
|
||||
mul = roundup_pow_of_two(total_vfs);
|
||||
dev_info(&pdev->dev,
|
||||
"VF BAR Total IOV size %llx > %llx, roundup to %d VFs\n",
|
||||
total_vf_bar_sz, gate, mul);
|
||||
iov->m64_single_mode = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (vf_bar_sz > (phb->ioda.m64_segsize >> 2)) {
|
||||
/*
|
||||
* On PHB3, the minimum size alignment of M64 BAR in
|
||||
* single mode is 32MB. If this VF BAR is smaller than
|
||||
* 32MB, but still too large for a segmented window
|
||||
* then we can't map it and need to disable SR-IOV for
|
||||
* this device.
|
||||
*/
|
||||
if (vf_bar_sz < SZ_32M) {
|
||||
pci_err(pdev, "VF BAR%d: %pR can't be mapped in single PE mode\n",
|
||||
i, res);
|
||||
goto disable_iov;
|
||||
}
|
||||
|
||||
for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
|
||||
res = &pdev->resource[i + PCI_IOV_RESOURCES];
|
||||
if (!res->flags || res->parent)
|
||||
iov->m64_single_mode[i] = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
size = pci_iov_resource_size(pdev, i + PCI_IOV_RESOURCES);
|
||||
/*
|
||||
* On PHB3, the minimum size alignment of M64 BAR in single
|
||||
* mode is 32MB.
|
||||
* This BAR can be mapped with one segmented window, so adjust
|
||||
* te resource size to accommodate.
|
||||
*/
|
||||
if (iov->m64_single_mode && (size < SZ_32M))
|
||||
goto disable_iov;
|
||||
pci_dbg(pdev, " Fixing VF BAR%d: %pR to\n", i, res);
|
||||
res->end = res->start + vf_bar_sz * mul - 1;
|
||||
pci_dbg(pdev, " %pR\n", res);
|
||||
|
||||
dev_dbg(&pdev->dev, " Fixing VF BAR%d: %pR to\n", i, res);
|
||||
res->end = res->start + size * mul - 1;
|
||||
dev_dbg(&pdev->dev, " %pR\n", res);
|
||||
dev_info(&pdev->dev, "VF BAR%d: %pR (expanded to %d VFs for PE alignment)",
|
||||
pci_info(pdev, "VF BAR%d: %pR (expanded to %d VFs for PE alignment)",
|
||||
i, res, mul);
|
||||
|
||||
iov->need_shift = true;
|
||||
}
|
||||
|
||||
iov->vfs_expanded = mul;
|
||||
|
||||
return;
|
||||
|
@ -259,42 +255,40 @@ void pnv_pci_ioda_fixup_iov(struct pci_dev *pdev)
|
|||
resource_size_t pnv_pci_iov_resource_alignment(struct pci_dev *pdev,
|
||||
int resno)
|
||||
{
|
||||
struct pnv_phb *phb = pci_bus_to_pnvhb(pdev->bus);
|
||||
struct pnv_iov_data *iov = pnv_iov_get(pdev);
|
||||
resource_size_t align;
|
||||
|
||||
/*
|
||||
* iov can be null if we have an SR-IOV device with IOV BAR that can't
|
||||
* be placed in the m64 space (i.e. The BAR is 32bit or non-prefetch).
|
||||
* In that case we don't allow VFs to be enabled since one of their
|
||||
* BARs would not be placed in the correct PE.
|
||||
*/
|
||||
if (!iov)
|
||||
return align;
|
||||
if (!iov->vfs_expanded)
|
||||
return align;
|
||||
|
||||
align = pci_iov_resource_size(pdev, resno);
|
||||
|
||||
/*
|
||||
* If we're using single mode then we can just use the native VF BAR
|
||||
* alignment. We validated that it's possible to use a single PE
|
||||
* window above when we did the fixup.
|
||||
*/
|
||||
if (iov->m64_single_mode[resno - PCI_IOV_RESOURCES])
|
||||
return align;
|
||||
|
||||
/*
|
||||
* On PowerNV platform, IOV BAR is mapped by M64 BAR to enable the
|
||||
* SR-IOV. While from hardware perspective, the range mapped by M64
|
||||
* BAR should be size aligned.
|
||||
*
|
||||
* When IOV BAR is mapped with M64 BAR in Single PE mode, the extra
|
||||
* powernv-specific hardware restriction is gone. But if just use the
|
||||
* VF BAR size as the alignment, PF BAR / VF BAR may be allocated with
|
||||
* in one segment of M64 #15, which introduces the PE conflict between
|
||||
* PF and VF. Based on this, the minimum alignment of an IOV BAR is
|
||||
* m64_segsize.
|
||||
*
|
||||
* This function returns the total IOV BAR size if M64 BAR is in
|
||||
* Shared PE mode or just VF BAR size if not.
|
||||
* If the M64 BAR is in Single PE mode, return the VF BAR size or
|
||||
* M64 segment size if IOV BAR size is less.
|
||||
*/
|
||||
align = pci_iov_resource_size(pdev, resno);
|
||||
|
||||
/*
|
||||
* iov can be null if we have an SR-IOV device with IOV BAR that can't
|
||||
* be placed in the m64 space (i.e. The BAR is 32bit or non-prefetch).
|
||||
* In that case we don't allow VFs to be enabled so just return the
|
||||
* default alignment.
|
||||
*/
|
||||
if (!iov)
|
||||
return align;
|
||||
if (!iov->vfs_expanded)
|
||||
return align;
|
||||
if (iov->m64_single_mode)
|
||||
return max(align, (resource_size_t)phb->ioda.m64_segsize);
|
||||
|
||||
return iov->vfs_expanded * align;
|
||||
}
|
||||
|
||||
|
@ -449,7 +443,7 @@ static int pnv_pci_vf_assign_m64(struct pci_dev *pdev, u16 num_vfs)
|
|||
continue;
|
||||
|
||||
/* don't need single mode? map everything in one go! */
|
||||
if (!iov->m64_single_mode) {
|
||||
if (!iov->m64_single_mode[i]) {
|
||||
win = pnv_pci_alloc_m64_bar(phb, iov);
|
||||
if (win < 0)
|
||||
goto m64_failed;
|
||||
|
@ -542,6 +536,8 @@ static int pnv_pci_vf_resource_shift(struct pci_dev *dev, int offset)
|
|||
res = &dev->resource[i + PCI_IOV_RESOURCES];
|
||||
if (!res->flags || !res->parent)
|
||||
continue;
|
||||
if (iov->m64_single_mode[i])
|
||||
continue;
|
||||
|
||||
/*
|
||||
* The actual IOV BAR range is determined by the start address
|
||||
|
@ -573,6 +569,8 @@ static int pnv_pci_vf_resource_shift(struct pci_dev *dev, int offset)
|
|||
res = &dev->resource[i + PCI_IOV_RESOURCES];
|
||||
if (!res->flags || !res->parent)
|
||||
continue;
|
||||
if (iov->m64_single_mode[i])
|
||||
continue;
|
||||
|
||||
size = pci_iov_resource_size(dev, i + PCI_IOV_RESOURCES);
|
||||
res2 = *res;
|
||||
|
@ -618,8 +616,8 @@ static void pnv_pci_sriov_disable(struct pci_dev *pdev)
|
|||
/* Release VF PEs */
|
||||
pnv_ioda_release_vf_PE(pdev);
|
||||
|
||||
/* Un-shift the IOV BAR resources */
|
||||
if (!iov->m64_single_mode)
|
||||
/* Un-shift the IOV BARs if we need to */
|
||||
if (iov->need_shift)
|
||||
pnv_pci_vf_resource_shift(pdev, -base_pe);
|
||||
|
||||
/* Release M64 windows */
|
||||
|
@ -736,9 +734,8 @@ static int pnv_pci_sriov_enable(struct pci_dev *pdev, u16 num_vfs)
|
|||
* the IOV BAR according to the PE# allocated to the VFs.
|
||||
* Otherwise, the PE# for the VF will conflict with others.
|
||||
*/
|
||||
if (!iov->m64_single_mode) {
|
||||
ret = pnv_pci_vf_resource_shift(pdev,
|
||||
base_pe->pe_number);
|
||||
if (iov->need_shift) {
|
||||
ret = pnv_pci_vf_resource_shift(pdev, base_pe->pe_number);
|
||||
if (ret)
|
||||
goto shift_failed;
|
||||
}
|
||||
|
|
|
@ -243,8 +243,15 @@ struct pnv_iov_data {
|
|||
/* pointer to the array of VF PEs. num_vfs long*/
|
||||
struct pnv_ioda_pe *vf_pe_arr;
|
||||
|
||||
/* Did we map the VF BARs with single-PE IODA BARs? */
|
||||
bool m64_single_mode;
|
||||
/* Did we map the VF BAR with single-PE IODA BARs? */
|
||||
bool m64_single_mode[PCI_SRIOV_NUM_BARS];
|
||||
|
||||
/*
|
||||
* True if we're using any segmented windows. In that case we need
|
||||
* shift the start of the IOV resource the segment corresponding to
|
||||
* the allocated PE.
|
||||
*/
|
||||
bool need_shift;
|
||||
|
||||
/*
|
||||
* Bit mask used to track which m64 windows are used to map the
|
||||
|
|
Loading…
Reference in New Issue